On Wed, Dec 12, 2018 at 11:38 PM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > As mentioned in the PR, we can use kxor? %kN, %kN, %kN or > kxnor? %kN, %kN, %kN to set %kN to 0 or -1, instead of > setting a GPR to that and moving to the mask register. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-12-12 Jakub Jelinek <ja...@redhat.com> > > PR target/88465 > * config/i386/i386.md (*movdi_internal, *movsi_internal, > *movhi_internal, *movqi_internal): Add alternative(s) to load > 0 or -1 into k registers using kxor or kxnoq instructions. > > * gcc.target/i386/avx512f-pr88465.c: New test. > * gcc.target/i386/avx512dq-pr88465.c: New test.
LGTM, with two adjustments, described below. Thanks, Uros. > > --- gcc/config/i386/i386.md.jj 2018-12-12 15:51:42.232521901 +0100 > +++ gcc/config/i386/i386.md 2018-12-12 17:56:17.530670339 +0100 > @@ -2056,9 +2056,9 @@ (define_split > > (define_insn "*movdi_internal" > [(set (match_operand:DI 0 "nonimmediate_operand" > - "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r > ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m") > + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r > ,?*Yd,?r,?*v,?*y,?*x,*k,*k ,*r,*m,*k") > (match_operand:DI 1 "general_operand" > - "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,v,*Yd,r ,*v,r > ,*x ,*y ,*r,*km,*k,*k"))] > + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,v,*Yd,r ,*v,r > ,*x ,*y ,*r,*km,*k,*k,*CBC"))] No need to decorate CBC with "*". There is no register to allocate. > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > switch (get_attr_type (insn)) > @@ -2066,6 +2066,13 @@ (define_insn "*movdi_internal" > case TYPE_MSKMOV: > return "kmovq\t{%1, %0|%0, %1}"; > > + case TYPE_MSKLOG: > + if (operands[1] == const0_rtx) > + return "kxorq\t%0, %0, %0"; > + else if (operands[1] == constm1_rtx) > + return "kxnorq\t%0, %0, %0"; > + gcc_unreachable (); > + > case TYPE_MULTI: > return "#"; > > @@ -2159,6 +2166,8 @@ (define_insn "*movdi_internal" > (const_string "ssecvt") > (eq_attr "alternative" "23,24,25,26") > (const_string "mskmov") > + (eq_attr "alternative" "27") > + (const_string "msklog") > (and (match_operand 0 "register_operand") > (match_operand 1 "pic_32bit_operand")) > (const_string "lea") > @@ -2296,9 +2305,9 @@ (define_peephole2 > > (define_insn "*movsi_internal" > [(set (match_operand:SI 0 "nonimmediate_operand" > - "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm") > + "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k") > (match_operand:SI 1 "general_operand" > - "g ,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,*v,r ,*r,*km,*k"))] > + "g ,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,*v,r ,*r,*km,*k ,*CBC"))] Same here. > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > switch (get_attr_type (insn)) > @@ -2309,6 +2318,13 @@ (define_insn "*movsi_internal" > case TYPE_MSKMOV: > return "kmovd\t{%1, %0|%0, %1}"; > > + case TYPE_MSKLOG: > + if (operands[1] == const0_rtx) > + return "kxord\t%0, %0, %0"; > + else if (operands[1] == constm1_rtx) > + return "kxnord\t%0, %0, %0"; > + gcc_unreachable (); > + > case TYPE_SSEMOV: > switch (get_attr_mode (insn)) > { > @@ -2375,6 +2391,8 @@ (define_insn "*movsi_internal" > (const_string "ssemov") > (eq_attr "alternative" "14,15,16") > (const_string "mskmov") > + (eq_attr "alternative" "17") > + (const_string "msklog") > (and (match_operand 0 "register_operand") > (match_operand 1 "pic_32bit_operand")) > (const_string "lea") > @@ -2419,8 +2437,8 @@ (define_insn "*movsi_internal" > (symbol_ref "true")))]) > > (define_insn "*movhi_internal" > - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m") > - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,r,km,k,k"))] > + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m,k") > + (match_operand:HI 1 "general_operand" "r > ,rn,rm,rn,r,km,k,k,CBC"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > switch (get_attr_type (insn)) > @@ -2444,6 +2462,13 @@ (define_insn "*movhi_internal" > gcc_unreachable (); > } > > + case TYPE_MSKLOG: > + if (operands[1] == const0_rtx) > + return "kxorw\t%0, %0, %0"; > + else if (operands[1] == constm1_rtx) > + return "kxnorw\t%0, %0, %0"; > + gcc_unreachable (); > + > default: > if (get_attr_mode (insn) == MODE_SI) > return "mov{l}\t{%k1, %k0|%k0, %k1}"; > @@ -2454,6 +2479,8 @@ (define_insn "*movhi_internal" > [(set (attr "type") > (cond [(eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > + (eq_attr "alternative" "8") > + (const_string "msklog") > (match_test "optimize_function_for_size_p (cfun)") > (const_string "imov") > (and (eq_attr "alternative" "0") > @@ -2469,7 +2496,7 @@ (define_insn "*movhi_internal" > ] > (const_string "imov"))) > (set (attr "prefix") > - (if_then_else (eq_attr "alternative" "4,5,6,7") > + (if_then_else (eq_attr "alternative" "4,5,6,7,8") > (const_string "vex") > (const_string "orig"))) > (set (attr "mode") > @@ -2498,9 +2525,9 @@ (define_insn "*movhi_internal" > > (define_insn "*movqi_internal" > [(set (match_operand:QI 0 "nonimmediate_operand" > - "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k") > + "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k,k,k") > (match_operand:QI 1 "general_operand" > - "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m"))] > + "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > { > static char buf[128]; > @@ -2538,6 +2565,21 @@ (define_insn "*movqi_internal" > snprintf (buf, sizeof (buf), ops, suffix); > return buf; > > + case TYPE_MSKLOG: > + if (operands[1] == const0_rtx) > + { > + if (get_attr_mode (insn) == MODE_HI) > + return "kxorw\t%0, %0, %0"; > + else > + return "kxorb\t%0, %0, %0"; > + } > + else if (operands[1] == constm1_rtx) > + { > + gcc_assert (TARGET_AVX512DQ); > + return "kxnorb\t%0, %0, %0"; > + } > + gcc_unreachable (); > + > default: > if (get_attr_mode (insn) == MODE_SI) > return "mov{l}\t{%k1, %k0|%k0, %k1}"; > @@ -2548,13 +2590,15 @@ (define_insn "*movqi_internal" > [(set (attr "isa") > (cond [(eq_attr "alternative" "1,2") > (const_string "x64") > - (eq_attr "alternative" "12,13") > + (eq_attr "alternative" "12,13,15") > (const_string "avx512dq") > ] > (const_string "*"))) > (set (attr "type") > (cond [(eq_attr "alternative" "9,10,11,12,13") > (const_string "mskmov") > + (eq_attr "alternative" "14,15") > + (const_string "msklog") > (and (eq_attr "alternative" "7") > (not (match_operand:QI 1 "aligned_operand"))) > (const_string "imovx") > @@ -2572,7 +2616,7 @@ (define_insn "*movqi_internal" > ] > (const_string "imov"))) > (set (attr "prefix") > - (if_then_else (eq_attr "alternative" "9,10,11") > + (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15") > (const_string "vex") > (const_string "orig"))) > (set (attr "mode") > @@ -2580,7 +2624,7 @@ (define_insn "*movqi_internal" > (const_string "SI") > (eq_attr "alternative" "8") > (const_string "QI") > - (and (eq_attr "alternative" "9,10,11") > + (and (eq_attr "alternative" "9,10,11,14") > (not (match_test "TARGET_AVX512DQ"))) > (const_string "HI") > (eq_attr "type" "imovx") > --- gcc/testsuite/gcc.target/i386/avx512f-pr88465.c.jj 2018-12-12 > 18:04:01.112113389 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-pr88465.c 2018-12-12 > 18:03:17.105830744 +0100 > @@ -0,0 +1,21 @@ > +/* PR target/88465 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512f -mno-avx512dq -mno-avx512bw" } */ > +/* { dg-final { scan-assembler-times "kxorw\[ \t]" 2 } } */ > +/* { dg-final { scan-assembler-times "kxnorw\[ \t]" 1 } } */ > + > +void > +foo (void) > +{ > + unsigned short int k = 0; > + __asm volatile ("" : : "k" (k)); > + k = -1; > + __asm volatile ("" : : "k" (k)); > +} > + > +void > +bar (void) > +{ > + unsigned char k = 0; > + __asm volatile ("" : : "k" (k)); > +} > --- gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c.jj 2018-12-12 > 18:04:55.962219263 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c 2018-12-12 > 18:04:31.959610537 +0100 > @@ -0,0 +1,14 @@ > +/* PR target/88465 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512dq -mno-avx512bw" } */ > +/* { dg-final { scan-assembler-times "kxorb\[ \t]" 1 } } */ > +/* { dg-final { scan-assembler-times "kxnorb\[ \t]" 1 } } */ > + > +void > +foo (void) > +{ > + unsigned char k = 0; > + __asm volatile ("" : : "k" (k)); > + k = -1; > + __asm volatile ("" : : "k" (k)); > +} > > Jakub