Hi! As mentioned in the PR, kmovb/w uses 8/16 bit memory or %k? register, only if it uses GPR it uses 32-bit register. Therefore, this patch ensures that %k[01] is only used if the operand matches "r" constraint, but not when it matches "m" constraint ("k" constraint has not been using that already before).
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-03-02 Jakub Jelinek <ja...@redhat.com> PR target/70028 * config/i386/i386.md (kmovw): Move m constraint to 2nd alternative. (*movhi_internal): Put mask moves from and to memory separately from moves from/to GPRs. * gcc.target/i386/pr70028.c: New test. --- gcc/config/i386/i386.md.jj 2016-02-10 16:01:58.000000000 +0100 +++ gcc/config/i386/i386.md 2016-03-01 20:06:10.724647196 +0100 @@ -2442,7 +2442,7 @@ (define_insn "*movsi_internal" (define_insn "kmovw" [(set (match_operand:HI 0 "nonimmediate_operand" "=k,k") (unspec:HI - [(match_operand:HI 1 "nonimmediate_operand" "rm,k")] + [(match_operand:HI 1 "nonimmediate_operand" "r,km")] UNSPEC_KMOV))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512F" "@ @@ -2454,8 +2454,8 @@ (define_insn "kmovw" (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k,rm") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,k,k"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k, r,m") + (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,r,km,k,k"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2469,7 +2469,8 @@ (define_insn "*movhi_internal" switch (which_alternative) { case 4: return "kmovw\t{%k1, %0|%0, %k1}"; - case 5: return "kmovw\t{%1, %0|%0, %1}"; + case 5: /* FALLTHRU */ + case 7: return "kmovw\t{%1, %0|%0, %1}"; case 6: return "kmovw\t{%1, %k0|%k0, %1}"; default: gcc_unreachable (); } @@ -2482,7 +2483,7 @@ (define_insn "*movhi_internal" } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") + (cond [(eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") @@ -2499,7 +2500,7 @@ (define_insn "*movhi_internal" ] (const_string "imov"))) (set (attr "prefix") - (if_then_else (eq_attr "alternative" "4,5,6") + (if_then_else (eq_attr "alternative" "4,5,6,7") (const_string "vex") (const_string "orig"))) (set (attr "mode") --- gcc/testsuite/gcc.target/i386/pr70028.c.jj 2016-03-01 20:11:09.142593365 +0100 +++ gcc/testsuite/gcc.target/i386/pr70028.c 2016-03-01 20:13:52.122377175 +0100 @@ -0,0 +1,19 @@ +/* PR target/70028 */ +/* { dg-do assemble { target int128 } } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-O2 -fno-forward-propagate -mavx512bw -masm=intel" } */ + +typedef unsigned short A; +typedef int B __attribute__ ((vector_size (32))); +typedef unsigned __int128 C; +typedef __int128 D __attribute__ ((vector_size (32))); + +C +foo (A a, int b, unsigned c, C d, A e, unsigned f, B g, D h) +{ + g[1] ^= (A) ~ a; + a ^= (unsigned) g[0]; + h %= (D) h | 1; + return a + b + c + d + e + g[0] + g[1] + h[1]; +} Jakub