On Wed, Dec 12, 2018 at 11:34 PM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch adds =*k, *km alternatives to the various > zero_extend?i?i2 patterns, because kmov* instructions actually zero extend > the destination to 64-bits. I've kept * for the alternatives because that > is what is used e.g. in *mov[sd]i_internal patterns (probably to make it > less likely that RA chooses to perform arithmetics unrelated to masks in > the mask registers). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-12-12 Jakub Jelinek <ja...@redhat.com> > > PR target/88461 > * config/i386/i386.md (*zero_extendsidi2, zero_extend<mode>di2, > *zero_extend<mode>si2, *zero_extendqihi2): Add =*k, *km alternatives.
LGTM. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2018-12-11 15:49:52.919342538 +0100 > +++ gcc/config/i386/i386.md 2018-12-12 15:51:42.232521901 +0100 > @@ -3751,10 +3751,10 @@ (define_expand "zero_extendsidi2" > > (define_insn "*zero_extendsidi2" > [(set (match_operand:DI 0 "nonimmediate_operand" > - "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r") > + "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k") > (zero_extend:DI > (match_operand:SI 1 "x86_64_zext_operand" > - "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k")))] > + "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))] > "" > { > switch (get_attr_type (insn)) > @@ -3806,6 +3806,8 @@ (define_insn "*zero_extendsidi2" > (const_string "avx512f") > (eq_attr "alternative" "12") > (const_string "x64_avx512bw") > + (eq_attr "alternative" "13") > + (const_string "avx512bw") > ] > (const_string "*"))) > (set (attr "type") > @@ -3819,7 +3821,7 @@ (define_insn "*zero_extendsidi2" > (const_string "multi")) > (eq_attr "alternative" "8,9,10,11") > (const_string "ssemov") > - (eq_attr "alternative" "12") > + (eq_attr "alternative" "12,13") > (const_string "mskmov") > ] > (const_string "imovx"))) > @@ -3881,16 +3883,17 @@ (define_mode_attr kmov_isa > [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")]) > > (define_insn "zero_extend<mode>di2" > - [(set (match_operand:DI 0 "register_operand" "=r,*r") > + [(set (match_operand:DI 0 "register_operand" "=r,*r,*k") > (zero_extend:DI > - (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))] > + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))] > "TARGET_64BIT" > "@ > movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1} > + kmov<mskmodesuffix>\t{%1, %k0|%k0, %1} > kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}" > - [(set_attr "isa" "*,<kmov_isa>") > - (set_attr "type" "imovx,mskmov") > - (set_attr "mode" "SI,<MODE>")]) > + [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>") > + (set_attr "type" "imovx,mskmov,mskmov") > + (set_attr "mode" "SI,<MODE>,<MODE>")]) > > (define_expand "zero_extend<mode>si2" > [(set (match_operand:SI 0 "register_operand") > @@ -3933,16 +3936,17 @@ (define_insn_and_split "zero_extend<mode > (set_attr "mode" "SI")]) > > (define_insn "*zero_extend<mode>si2" > - [(set (match_operand:SI 0 "register_operand" "=r,*r") > + [(set (match_operand:SI 0 "register_operand" "=r,*r,*k") > (zero_extend:SI > - (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k")))] > + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))] > "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" > "@ > movz{<imodesuffix>l|x}\t{%1, %0|%0, %1} > + kmov<mskmodesuffix>\t{%1, %0|%0, %1} > kmov<mskmodesuffix>\t{%1, %0|%0, %1}" > - [(set_attr "isa" "*,<kmov_isa>") > - (set_attr "type" "imovx,mskmov") > - (set_attr "mode" "SI,<MODE>")]) > + [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>") > + (set_attr "type" "imovx,mskmov,mskmov") > + (set_attr "mode" "SI,<MODE>,<MODE>")]) > > (define_expand "zero_extendqihi2" > [(set (match_operand:HI 0 "register_operand") > @@ -3985,15 +3989,16 @@ (define_insn_and_split "zero_extendqihi2 > > ; zero extend to SImode to avoid partial register stalls > (define_insn "*zero_extendqihi2" > - [(set (match_operand:HI 0 "register_operand" "=r,*r") > - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k")))] > + [(set (match_operand:HI 0 "register_operand" "=r,*r,*k") > + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" > "qm,*k,*km")))] > "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" > "@ > movz{bl|x}\t{%1, %k0|%k0, %1} > - kmovb\t{%1, %k0|%k0, %1}" > - [(set_attr "isa" "*,avx512dq") > - (set_attr "type" "imovx,mskmov") > - (set_attr "mode" "SI,QI")]) > + kmovb\t{%1, %k0|%k0, %1} > + kmovb\t{%1, %0|%0, %1}" > + [(set_attr "isa" "*,avx512dq,avx512dq") > + (set_attr "type" "imovx,mskmov,mskmov") > + (set_attr "mode" "SI,QI,QI")]) > > (define_insn_and_split "*zext<mode>_doubleword_and" > [(set (match_operand:DI 0 "register_operand" "=&<r>") > > Jakub