On Mon, Oct 23, 2017 at 09:03:33PM +0200, Uros Bizjak wrote: > >> As for addcarry/subborrow, the problem is that we expect in the pr67317* > >> tests that combine is able to notice that the CF setter sets CF to > >> unconditional 0 and matches the pattern. With the patch I wrote > >> we end up with the combiner trying to match an insn where the CCC > >> is set from a TImode comparison: > >> (parallel [ > >> (set (reg:CC 17 flags) > >> (compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ]) > >> (reg/v:DI 94 [ c ]))) > >> (zero_extend:TI (reg/v:DI 94 [ c ])))) > >> (set (reg:DI 98) > >> (plus:DI (reg/v:DI 92 [ a ]) > >> (reg/v:DI 94 [ c ]))) > >> ]) > >> So, either we need a define_insn_and_split pattern that would deal with > >> that (for UNSPEC it would be the same thing, have a define_insn_and_split > >> that would replace the (ltu...) with (const_int 0)), or perhaps be smarter > >> during expansion, if we see the first argument is constant 0, expand it > >> like a normal add instruction with CC setter.
This patch fixes the addcarry/subborrow patterns and deals with the pr67317* regressions by special casing the first argument 0 case already at expansion rather than waiting for combine. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-10-24 Jakub Jelinek <ja...@redhat.com> PR target/82628 * config/i386/i386.md (addcarry<mode>, subborrow<mode>): Change patterns to better describe from which operation the CF is computed. (addcarry<mode>_0, subborrow<mode>_0): New patterns. * config/i386/i386.c (ix86_expand_builtin) <case handlecarry>: Pass one LTU with [DT]Imode and another one with [SD]Imode. If arg0 is 0, use _0 suffixed expanders instead of emitting a comparison before it. --- gcc/config/i386/i386.c.jj 2017-10-23 12:52:20.711575924 +0200 +++ gcc/config/i386/i386.c 2017-10-23 15:59:07.379391029 +0200 @@ -35050,10 +35050,10 @@ ix86_expand_builtin (tree exp, rtx targe machine_mode mode, int ignore) { size_t i; - enum insn_code icode; + enum insn_code icode, icode2; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg0, arg1, arg2, arg3, arg4; - rtx op0, op1, op2, op3, op4, pat, insn; + rtx op0, op1, op2, op3, op4, pat, pat2, insn; machine_mode mode0, mode1, mode2, mode3, mode4; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); @@ -36028,22 +36028,34 @@ rdseed_step: case IX86_BUILTIN_SBB32: icode = CODE_FOR_subborrowsi; + icode2 = CODE_FOR_subborrowsi_0; mode0 = SImode; + mode1 = DImode; + mode2 = CCmode; goto handlecarry; case IX86_BUILTIN_SBB64: icode = CODE_FOR_subborrowdi; + icode2 = CODE_FOR_subborrowdi_0; mode0 = DImode; + mode1 = TImode; + mode2 = CCmode; goto handlecarry; case IX86_BUILTIN_ADDCARRYX32: icode = CODE_FOR_addcarrysi; + icode2 = CODE_FOR_addcarrysi_0; mode0 = SImode; + mode1 = DImode; + mode2 = CCCmode; goto handlecarry; case IX86_BUILTIN_ADDCARRYX64: icode = CODE_FOR_addcarrydi; + icode2 = CODE_FOR_addcarrydi_0; mode0 = DImode; + mode1 = TImode; + mode2 = CCCmode; handlecarry: arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ @@ -36052,7 +36064,8 @@ rdseed_step: arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ op1 = expand_normal (arg0); - op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); + if (!integer_zerop (arg0)) + op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); op2 = expand_normal (arg1); if (!register_operand (op2, mode0)) @@ -36069,21 +36082,31 @@ rdseed_step: op4 = copy_addr_to_reg (op4); } - /* Generate CF from input operand. */ - emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); - - /* Generate instruction that consumes CF. */ op0 = gen_reg_rtx (mode0); + if (integer_zerop (arg0)) + { + /* If arg0 is 0, optimize right away into add or sub + instruction that sets CCCmode flags. */ + op1 = gen_rtx_REG (mode2, FLAGS_REG); + emit_insn (GEN_FCN (icode2) (op0, op2, op3)); + } + else + { + /* Generate CF from input operand. */ + emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); - op1 = gen_rtx_REG (CCCmode, FLAGS_REG); - pat = gen_rtx_LTU (mode0, op1, const0_rtx); - emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat)); + /* Generate instruction that consumes CF. */ + op1 = gen_rtx_REG (CCCmode, FLAGS_REG); + pat = gen_rtx_LTU (mode1, op1, const0_rtx); + pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); + emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); + } /* Return current CF value. */ if (target == 0) target = gen_reg_rtx (QImode); - PUT_MODE (pat, QImode); + pat = gen_rtx_LTU (QImode, op1, const0_rtx); emit_insn (gen_rtx_SET (target, pat)); /* Store the result. */ --- gcc/config/i386/i386.md.jj 2017-10-23 12:52:20.701576051 +0200 +++ gcc/config/i386/i386.md 2017-10-23 15:53:36.013585636 +0200 @@ -6840,15 +6840,19 @@ (define_insn "*addsi3_carry_zext" (define_insn "addcarry<mode>" [(set (reg:CCC FLAGS_REG) (compare:CCC - (plus:SWI48 + (zero_extend:<DWI> (plus:SWI48 - (match_operator:SWI48 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 1 "nonimmediate_operand" "%0")) - (match_operand:SWI48 2 "nonimmediate_operand" "rm")) - (match_dup 1))) + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) + (plus:<DWI> + (zero_extend:<DWI> (match_dup 2)) + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])))) (set (match_operand:SWI48 0 "register_operand" "=r") - (plus:SWI48 (plus:SWI48 (match_op_dup 4 + (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] @@ -6859,6 +6863,18 @@ (define_insn "addcarry<mode>" (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_expand "addcarry<mode>_0" + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:SWI48 2 "x86_64_general_operand")) + (match_dup 1))) + (set (match_operand:SWI48 0 "register_operand") + (plus:SWI48 (match_dup 1) (match_dup 2)))])] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)") + (define_insn "sub<mode>3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") (minus:SWI @@ -6941,15 +6957,18 @@ (define_insn "sub<mode>3_carry_ccgz" (define_insn "subborrow<mode>" [(set (reg:CCC FLAGS_REG) (compare:CCC - (match_operand:SWI48 1 "nonimmediate_operand" "0") - (plus:SWI48 - (match_operator:SWI48 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 2 "nonimmediate_operand" "rm")))) + (zero_extend:<DWI> + (match_operand:SWI48 1 "nonimmediate_operand" "0")) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend:<DWI> + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))) (set (match_operand:SWI48 0 "register_operand" "=r") - (minus:SWI48 (minus:SWI48 (match_dup 1) - (match_op_dup 4 - [(match_dup 3) (const_int 0)])) + (minus:SWI48 (minus:SWI48 + (match_dup 1) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" @@ -6957,6 +6976,16 @@ (define_insn "subborrow<mode>" (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) + +(define_expand "subborrow<mode>_0" + [(parallel + [(set (reg:CC FLAGS_REG) + (compare:CC + (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:SWI48 2 "<general_operand>"))) + (set (match_operand:SWI48 0 "register_operand") + (minus:SWI48 (match_dup 1) (match_dup 2)))])] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)") ;; Overflow setting add instructions Jakub