x86 can do masking on the count operand by itself. Instead of inventing new insn, just split the combined pattern to a normal shift insn.
2016-08-16 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*ashl<mode>3_mask): Rewrite define_insn pattern as define_insn_and_split. Split insn before reload to ashl<mode>3_1. (*<shift_insn><mode>3_mask): Ditto. Split insn before reload to <shift_insn><mode>3_1. (*<rotate_insn><mode>3_mask): Ditto. Split insn before reload to <rotate_insn><mode>3_1. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline. Uros.
Index: i386.md =================================================================== --- i386.md (revision 239502) +++ i386.md (working copy) @@ -9794,23 +9794,27 @@ }) ;; Avoid useless masking of count operand. -(define_insn "*ashl<mode>3_mask" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") +(define_insn_and_split "*ashl<mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand") (ashift:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0") + (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand" "c") - (match_operand:SI 3 "const_int_operand" "n")) 0))) + (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) - == GET_MODE_BITSIZE (<MODE>mode)-1" -{ - return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}"; -} - [(set_attr "type" "ishift") - (set_attr "mode" "<MODE>")]) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (QImode, operands[2]);") (define_insn "*bmi2_ashl<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -10290,23 +10294,27 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") ;; Avoid useless masking of count operand. -(define_insn "*<shift_insn><mode>3_mask" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") +(define_insn_and_split "*<shift_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand") (any_shiftrt:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0") + (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand" "c") - (match_operand:SI 3 "const_int_operand" "n")) 0))) + (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) - == GET_MODE_BITSIZE (<MODE>mode)-1" -{ - return "<shift>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; -} - [(set_attr "type" "ishift") - (set_attr "mode" "<MODE>")]) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (QImode, operands[2]);") (define_insn_and_split "*<shift_insn><mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=r") @@ -10745,23 +10753,27 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") ;; Avoid useless masking of count operand. -(define_insn "*<rotate_insn><mode>3_mask" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") +(define_insn_and_split "*<rotate_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand") (any_rotate:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0") + (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand" "c") - (match_operand:SI 3 "const_int_operand" "n")) 0))) + (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) - == GET_MODE_BITSIZE (<MODE>mode)-1" -{ - return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; -} - [(set_attr "type" "rotate") - (set_attr "mode" "<MODE>")]) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_rotate:SWI48 (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (QImode, operands[2]);") ;; Implement rotation using two double-precision ;; shift instructions and a scratch register.