Hi! This is very similar to the previous PR93141 addv<mode>4 half and improves signed __builtin_sub_overflow on double-words rather than __builtin_add_overflow.
I have left out the uaddv<mode>4 double-word stuff, because I ran into issues with it - as the pattern uses (set (reg:CC flags) (compare:CC (reg:TI) (reg:TI))) as the first set, ifcvt.c (canonicalize_condition it calls) extracts the TImode comparison arguments from it, but as we don't have a cmpti3 pattern, noce_try_store_flag fails and we don't manage to convert branchy code into setc. Even for smaller modes ifcvt in that case does weird things, after the sub instruction which sets the flags too it emits a cmp with the same arguments and only postreload we notice the compare is redundant and remove it. So, dunno if we want to improve somehow ifcvt and for conditions where we'd look through a multiple set instructions somehow try harder to reuse the original instruction, or if usub<dwi>4 should not use compare:CC of the operands and instead use some obfuscation to only set CCCmode to prevent ifcvt from punting on it. Anyway, this part works, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-01-07 Jakub Jelinek <ja...@redhat.com> PR target/93141 (subv<mode>4): Use SWIDWI iterator instead of SWI. Use <general_hilo_operand> instead of <general_operand>. Use CONST_SCALAR_INT_P instead of CONST_INT_P. (*subv<mode>4_1): Rename to ... (subv<mode>4_1): ... this. (*subv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New define_insn_and_split patterns. (*subv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn patterns. * gcc.target/i386/pr93141-1.c: Add tests with constants that have MSB of the low half of the constant set. * gcc.target/i386/pr93141-2.c: New test. --- gcc/config/i386/i386.md.jj 2020-01-07 08:01:22.618613702 +0100 +++ gcc/config/i386/i386.md 2020-01-07 10:07:01.770587027 +0100 @@ -6569,16 +6569,17 @@ (define_insn "*subsi_2_zext" ;; Subtract with jump on overflow. (define_expand "subv<mode>4" [(parallel [(set (reg:CCO FLAGS_REG) - (eq:CCO (minus:<DWI> - (sign_extend:<DWI> - (match_operand:SWI 1 "nonimmediate_operand")) - (match_dup 4)) - (sign_extend:<DWI> - (minus:SWI (match_dup 1) - (match_operand:SWI 2 - "<general_operand>"))))) - (set (match_operand:SWI 0 "register_operand") - (minus:SWI (match_dup 1) (match_dup 2)))]) + (eq:CCO + (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWIDWI 1 "nonimmediate_operand")) + (match_dup 4)) + (sign_extend:<DWI> + (minus:SWIDWI (match_dup 1) + (match_operand:SWIDWI 2 + "<general_hilo_operand>"))))) + (set (match_operand:SWIDWI 0 "register_operand") + (minus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) @@ -6586,7 +6587,7 @@ (define_expand "subv<mode>4" "" { ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands); - if (CONST_INT_P (operands[2])) + if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); @@ -6608,7 +6609,7 @@ (define_insn "*subv<mode>4" [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) -(define_insn "*subv<mode>4_1" +(define_insn "subv<mode>4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus:<DWI> (sign_extend:<DWI> @@ -6633,6 +6634,162 @@ (define_insn "*subv<mode>4_1" (const_string "4")] (const_string "<MODE_SIZE>")))]) +(define_insn_and_split "*subv<dwi>4_doubleword" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<QWI> + (sign_extend:<QWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")) + (sign_extend:<QWI> + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))) + (sign_extend:<QWI> + (minus:<DWI> (match_dup 1) (match_dup 2))))) + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") + (minus:<DWI> (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (minus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<DWI> + (minus:<DWI> + (sign_extend:<DWI> (match_dup 4)) + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))) + (sign_extend:<DWI> (match_dup 5))) + (sign_extend:<DWI> + (minus:DWIH + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5))))) + (set (match_dup 3) + (minus:DWIH + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5)))])] +{ + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); +}) + +(define_insn_and_split "*subv<dwi>4_doubleword_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<QWI> + (sign_extend:<QWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "0")) + (match_operand:<QWI> 3 "const_scalar_int_operand" "")) + (sign_extend:<QWI> + (minus:<DWI> + (match_dup 1) + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>"))))) + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro") + (minus:<DWI> (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands) + && CONST_SCALAR_INT_P (operands[2]) + && rtx_equal_p (operands[2], operands[3])" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (minus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<DWI> + (minus:<DWI> + (sign_extend:<DWI> (match_dup 4)) + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5)) + (sign_extend:<DWI> + (minus:DWIH + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5))))) + (set (match_dup 3) + (minus:DWIH + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5)))])] +{ + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); + if (operands[2] == const0_rtx) + { + emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5], + operands[5])); + DONE; + } +}) + +(define_insn "*subv<mode>4_overflow_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<DWI> + (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_sext_operand>" "rWe,m"))) + (sign_extend:<DWI> + (minus:SWI + (minus:SWI + (match_dup 1) + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 2))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") + (minus:SWI + (minus:SWI + (match_dup 1) + (match_op_dup 5 [(match_dup 3) (const_int 0)])) + (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*subv<mode>4_overflow_2" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (minus:<DWI> + (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%0")) + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (match_operand:<DWI> 6 "const_int_operand" "")) + (sign_extend:<DWI> + (minus:SWI + (minus:SWI + (match_dup 1) + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") + (minus:SWI + (minus:SWI + (match_dup 1) + (match_op_dup 5 [(match_dup 3) (const_int 0)])) + (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[6])" + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (const_string "4")))]) + (define_expand "usubv<mode>4" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC --- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj 2020-01-05 13:51:34.452627004 +0100 +++ gcc/testsuite/gcc.target/i386/pr93141-1.c 2020-01-07 10:45:36.213489037 +0100 @@ -2,15 +2,17 @@ /* { dg-do compile } */ /* { dg-options "-O2 -masm=att" } */ /* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ -/* { dg-final { scan-assembler-times "setc\t%" 3 } } */ -/* { dg-final { scan-assembler-times "seto\t%" 5 } } */ -/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */ +/* { dg-final { scan-assembler-times "setc\t%" 5 } } */ +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */ +/* { dg-final { scan-assembler-times "adc\[lq]\t" 9 } } */ #ifdef __x86_64__ typedef unsigned __int128 U; +typedef unsigned long long HU; typedef signed __int128 S; #else typedef unsigned long long U; +typedef unsigned int HU; typedef signed long long S; #endif int o; @@ -81,3 +83,39 @@ garply (S x) | (S) 0xbeedead, &z); return z; } + +S +waldo (S x) +{ + S z; + o = __builtin_add_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2)) + | -(HU) 0xbeedead), &z); + return z; +} + +S +fred (S x) +{ + S z; + o = __builtin_add_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2))) + | -(HU) 0xbeedead), &z); + return z; +} + +U +plugh (U x) +{ + U z; + o = __builtin_add_overflow (x, (U) ((((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2)) + | -(HU) 0xbeedead), &z); + return z; +} + +U +xyzzy (U x) +{ + U z; + o = __builtin_add_overflow (x, (U) ((-(((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2))) + | -(HU) 0xbeedead), &z); + return z; +} --- gcc/testsuite/gcc.target/i386/pr93141-2.c.jj 2020-01-07 10:30:53.966831451 +0100 +++ gcc/testsuite/gcc.target/i386/pr93141-2.c 2020-01-07 10:43:27.602433611 +0100 @@ -0,0 +1,78 @@ +/* PR target/93141 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ +/* { dg-final { scan-assembler-not "adc\[lq]\t" } } */ +/* { dg-final { scan-assembler-times "seto\t%" 7 } } */ +/* { dg-final { scan-assembler-times "sbb\[lq]\t" 5 } } */ + +#ifdef __x86_64__ +typedef unsigned __int128 U; +typedef unsigned long long HU; +typedef signed __int128 S; +#else +typedef unsigned long long U; +typedef signed int HU; +typedef signed long long S; +#endif +int o; + +S +qux (S x, S y) +{ + S z; + o = __builtin_sub_overflow (x, y, &z); + return z; +} + +S +quux (S x) +{ + S z; + o = __builtin_sub_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +corge (S x) +{ + S z; + o = __builtin_sub_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2)) + | (S) 0xbeedead, &z); + return z; +} + +S +grault (S x) +{ + S z; + o = __builtin_sub_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +garply (S x) +{ + S z; + o = __builtin_sub_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2))) + | (S) 0xbeedead, &z); + return z; +} + +S +waldo (S x) +{ + S z; + o = __builtin_sub_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2)) + | -(HU) 0xbeedead), &z); + return z; +} + +S +fred (S x) +{ + S z; + o = __builtin_sub_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2))) + | -(HU) 0xbeedead), &z); + return z; +} Jakub