Hi! As the following testcase shows, we generate quite bad code for double-word __builtin_add_overflow on x86, we have add[dt]i3_doubleword pattern and emit add[ql]; adc[ql];, but then we could just use setc/seto or adc etc., but we instead perform a double-word comparison to set compute the carry from the addition (or overflow). The following patch on the testcase results in the attached changes for -m64 (and -m32). Also, when the middle-end notices the optabs, it pattern recognizes hand-written code, like e.g. in the #c0 testcase in the PR where it does sum += prod; overflow += sum < prod; which the GIMPLE opts can turn into overflow += __builtin_add_overflow (sum, prod, &sum);.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Haven't done yet subv<mode>4 and usubv<mode>4 for the double-word modes, but it ought to work similarly, just it will be more work. 2020-01-04 Jakub Jelinek <ja...@redhat.com> PR target/93141 * config/i386/i386.md (SWIDWI): New mode iterator. (DWI, dwi): Add TImode variants. (addv<mode>4): Use SWIDWI iterator instead of SWI. Use <general_hilo_operand> instead of <general_operand>. Use CONST_SCALAR_INT_P instead of CONST_INT_P. (*addv<mode>4_1): Rename to ... (addv<mode>4_1): ... this. (QWI): New mode attribute. (*addv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New define_insn_and_split patterns. (*addv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn patterns. (uaddv<mode>4): Use SWIDWI iterator instead of SWI. Use <general_hilo_operand> instead of <general_operand>. (*addcarry<mode>_1): New define_insn. (*add<dwi>3_doubleword_cc_overflow_1): New define_insn_and_split. * gcc.target/i386/pr93141-1.c: New test. * gcc.dg/pr67089-6.c: Expect 16 ADD_OVERFLOW calls even on ia32. --- gcc/config/i386/i386.md.jj 2020-01-03 11:10:43.839511446 +0100 +++ gcc/config/i386/i386.md 2020-01-03 20:55:39.152577178 +0100 @@ -1036,6 +1036,9 @@ (define_mode_iterator SWIM248 [(HI "TARG (define_mode_iterator DWI [(DI "!TARGET_64BIT") (TI "TARGET_64BIT")]) +;; SWI and DWI together. +(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) + ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not ;; compile time constant, it is faster to use <MODE_SIZE> than ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on @@ -1051,8 +1054,8 @@ (define_mode_attr MODE_SIZE [(QI "1") (H (V4SF "16") (V8SF "32") (V16SF "64")]) ;; Double word integer modes as mode attribute. -(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) -(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) +(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) +(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")]) ;; LEA mode corresponding to an integer mode (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) @@ -6054,16 +6057,17 @@ (define_insn "*addqi_ext_2" ;; Add with jump on overflow. (define_expand "addv<mode>4" [(parallel [(set (reg:CCO FLAGS_REG) - (eq:CCO (plus:<DWI> - (sign_extend:<DWI> - (match_operand:SWI 1 "nonimmediate_operand")) - (match_dup 4)) - (sign_extend:<DWI> - (plus:SWI (match_dup 1) - (match_operand:SWI 2 - "<general_operand>"))))) - (set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_dup 1) (match_dup 2)))]) + (eq:CCO + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:SWIDWI 1 "nonimmediate_operand")) + (match_dup 4)) + (sign_extend:<DWI> + (plus:SWIDWI (match_dup 1) + (match_operand:SWIDWI 2 + "<general_hilo_operand>"))))) + (set (match_operand:SWIDWI 0 "register_operand") + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) @@ -6071,7 +6075,7 @@ (define_expand "addv<mode>4" "" { ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); - if (CONST_INT_P (operands[2])) + if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); @@ -6093,7 +6097,7 @@ (define_insn "*addv<mode>4" [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) -(define_insn "*addv<mode>4_1" +(define_insn "addv<mode>4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus:<DWI> (sign_extend:<DWI> @@ -6118,15 +6122,178 @@ (define_insn "*addv<mode>4_1" (const_string "4")] (const_string "<MODE_SIZE>")))]) +;; Quad word integer modes as mode attribute. +(define_mode_attr QWI [(SI "TI") (DI "OI")]) + +(define_insn_and_split "*addv<dwi>4_doubleword" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<QWI> + (sign_extend:<QWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")) + (sign_extend:<QWI> + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))) + (sign_extend:<QWI> + (plus:<DWI> (match_dup 1) (match_dup 2))))) + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") + (plus:<DWI> (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<DWI> + (plus:<DWI> + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)) + (sign_extend:<DWI> (match_dup 4))) + (sign_extend:<DWI> (match_dup 5))) + (sign_extend:<DWI> + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); +}) + +(define_insn_and_split "*addv<dwi>4_doubleword_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<QWI> + (sign_extend:<QWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "%0")) + (match_operand:<QWI> 3 "const_scalar_int_operand" "")) + (sign_extend:<QWI> + (plus:<DWI> + (match_dup 1) + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>"))))) + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro") + (plus:<DWI> (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands) + && CONST_SCALAR_INT_P (operands[2]) + && rtx_equal_p (operands[2], operands[3])" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<DWI> + (plus:<DWI> + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)) + (sign_extend:<DWI> (match_dup 4))) + (match_dup 5)) + (sign_extend:<DWI> + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); + if (operands[2] == const0_rtx) + { + emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5], + operands[5])); + DONE; + } +}) + +(define_insn "*addv<mode>4_overflow_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<DWI> + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_sext_operand>" "rWe,m"))) + (sign_extend:<DWI> + (plus:SWI + (plus:SWI + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") + (plus:SWI + (plus:SWI + (match_op_dup 5 [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*addv<mode>4_overflow_2" + [(set (reg:CCO FLAGS_REG) + (eq:CCO + (plus:<DWI> + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%0"))) + (match_operand:<DWI> 6 "const_int_operand" "")) + (sign_extend:<DWI> + (plus:SWI + (plus:SWI + (match_operator:SWI 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") + (plus:SWI + (plus:SWI + (match_op_dup 5 [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[6])" + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (const_string "4")))]) + (define_expand "uaddv<mode>4" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC - (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand") - (match_operand:SWI 2 "<general_operand>")) + (plus:SWIDWI + (match_operand:SWIDWI 1 "nonimmediate_operand") + (match_operand:SWIDWI 2 "<general_hilo_operand>")) (match_dup 1))) - (set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_dup 1) (match_dup 2)))]) + (set (match_operand:SWIDWI 0 "register_operand") + (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (ltu (reg:CCC FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) @@ -6649,6 +6816,48 @@ (define_expand "addcarry<mode>_0" (plus:SWI48 (match_dup 1) (match_dup 2)))])] "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)") +(define_insn "*addcarry<mode>_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) + (match_operand:SWI48 2 "x86_64_immediate_operand" "e"))) + (plus:<DWI> + (match_operand:<DWI> 6 "const_scalar_int_operand" "") + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands) + && CONST_INT_P (operands[2]) + /* Check that operands[6] is operands[2] zero extended from + <MODE>mode to <DWI>mode. */ + && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0) + ? (CONST_INT_P (operands[6]) + && UINTVAL (operands[6]) == (UINTVAL (operands[2]) + & GET_MODE_MASK (<MODE>mode))) + : (CONST_WIDE_INT_P (operands[6]) + && CONST_WIDE_INT_NUNITS (operands[6]) == 2 + && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0) + == UINTVAL (operands[2])) + && CONST_WIDE_INT_ELT (operands[6], 1) == 0))" + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (const_string "4")))]) + (define_insn "@sub<mode>3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") (minus:SWI @@ -6885,6 +7094,54 @@ (define_insn "*addsi3_zext_cc_overflow_2 [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:<DWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0") + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")) + (match_dup 1))) + (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") + (plus:<DWI> (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:DWIH + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (plus:<DWI> + (match_dup 6) + (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))))) + (set (match_dup 3) + (plus:DWIH + (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5)))])] +{ + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); + if (operands[2] == const0_rtx) + { + emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5])); + DONE; + } + if (CONST_INT_P (operands[5])) + operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode, + operands[5], <MODE>mode); + else + operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]); +}) + ;; x == 0 with zero flag test can be done also as x < 1U with carry flag ;; test, where the latter is preferrable if we have some carry consuming ;; instruction. --- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj 2020-01-03 20:59:02.053542989 +0100 +++ gcc/testsuite/gcc.target/i386/pr93141-1.c 2020-01-03 21:02:33.573379636 +0100 @@ -0,0 +1,83 @@ +/* PR target/93141 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */ +/* { dg-final { scan-assembler-times "setc\t%" 3 } } */ +/* { dg-final { scan-assembler-times "seto\t%" 5 } } */ +/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */ + +#ifdef __x86_64__ +typedef unsigned __int128 U; +typedef signed __int128 S; +#else +typedef unsigned long long U; +typedef signed long long S; +#endif +int o; + +U +foo (U x, U y) +{ + U z; + o = __builtin_add_overflow (x, y, &z); + return z; +} + +U +bar (U x) +{ + U z; + o = __builtin_add_overflow (x, ((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2), &z); + return z; +} + +U +baz (U x) +{ + U z; + o = __builtin_add_overflow (x, (((U) 0xdeadbee) << (sizeof (U) * __CHAR_BIT__ / 2)) + | (U) 0xbeedead, &z); + return z; +} + +S +qux (S x, S y) +{ + S z; + o = __builtin_add_overflow (x, y, &z); + return z; +} + +S +quux (S x) +{ + S z; + o = __builtin_add_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +corge (S x) +{ + S z; + o = __builtin_add_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2)) + | (S) 0xbeedead, &z); + return z; +} + +S +grault (S x) +{ + S z; + o = __builtin_add_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2), &z); + return z; +} + +S +garply (S x) +{ + S z; + o = __builtin_add_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ / 2))) + | (S) 0xbeedead, &z); + return z; +} --- gcc/testsuite/gcc.dg/pr67089-6.c.jj 2015-11-25 09:57:47.986410019 +0100 +++ gcc/testsuite/gcc.dg/pr67089-6.c 2020-01-04 00:24:48.944285746 +0100 @@ -56,7 +56,6 @@ T (24, unsigned long long, x + y, if (d T (25, unsigned short, 2U - x, if (r > 2U) foo (0)) T (26, unsigned char, 2U - x, if (r <= 2U) foo (0)) -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */ -/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 12 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ /* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ Jakub
--- pr93141-1.s 2020-01-03 20:46:46.090542214 +0100 +++ pr93141-1.s 2020-01-03 20:56:46.718566962 +0100 @@ -12,9 +12,6 @@ foo: movq %rcx, %rdx addq %r9, %rax adcq %r8, %rdx - cmpq %r9, %rax - movq %rdx, %rcx - sbbq %r8, %rcx setc %cl movzbl %cl, %ecx movl %ecx, o(%rip) @@ -32,8 +29,7 @@ bar: xorl %ecx, %ecx movq %rdi, %rax addq $233495534, %rdx - cmpq $233495533, %rdx - setbe %cl + setc %cl movl %ecx, o(%rip) ret .cfi_endproc @@ -49,9 +45,6 @@ baz: movq %rsi, %rdx addq $200203949, %rax adcq $233495534, %rdx - cmpq $200203949, %rax - movq %rdx, %rcx - sbbq $233495534, %rcx setc %cl movzbl %cl, %ecx movl %ecx, o(%rip) @@ -65,22 +58,15 @@ baz: qux: .LFB3: .cfi_startproc - movq %rdi, %r8 - movq %rsi, %r11 - movq %rcx, %rdi - movq %r8, %r10 - addq %rdx, %r10 - movq %rdi, %rdx - adcq %rcx, %r11 - movq %rsi, %rcx - movq %r10, %rax - xorq %rdi, %rcx - xorq %r11, %rdx - notq %rcx - andq %rcx, %rdx - shrq $63, %rdx - movl %edx, o(%rip) - movq %r11, %rdx + movq %rdi, %r9 + movq %rdx, %rax + movq %rsi, %r8 + movq %rcx, %rdx + addq %r9, %rax + adcq %r8, %rdx + seto %cl + movzbl %cl, %ecx + movl %ecx, o(%rip) ret .cfi_endproc .LFE3: @@ -91,16 +77,11 @@ qux: quux: .LFB4: .cfi_startproc - movq %rsi, %r8 - movq %rdi, %r9 - movq %r8, %rdx - movq %r9, %rax + movq %rsi, %rdx + xorl %ecx, %ecx + movq %rdi, %rax addq $233495534, %rdx - cmpq %r9, %r9 - movq %rdx, %rcx - sbbq %r8, %rcx - setl %cl - movzbl %cl, %ecx + seto %cl movl %ecx, o(%rip) ret .cfi_endproc @@ -112,16 +93,11 @@ quux: corge: .LFB5: .cfi_startproc - movq %rdi, %r9 - movq %rsi, %r8 - movq %r9, %rax - movq %r8, %rdx + movq %rdi, %rax + movq %rsi, %rdx addq $200203949, %rax adcq $233495534, %rdx - cmpq %r9, %rax - movq %rdx, %rcx - sbbq %r8, %rcx - setl %cl + seto %cl movzbl %cl, %ecx movl %ecx, o(%rip) ret @@ -134,15 +110,11 @@ corge: grault: .LFB6: .cfi_startproc - movq %rdi, %r9 movq %rsi, %rdx - movq %rsi, %rdi xorl %ecx, %ecx - subq $233495534, %rdx - cmpq %r9, %r9 - movq %r9, %rax - sbbq %rdx, %rdi - setl %cl + movq %rdi, %rax + addq $-233495534, %rdx + seto %cl movl %ecx, o(%rip) ret .cfi_endproc @@ -154,16 +126,12 @@ grault: garply: .LFB7: .cfi_startproc - movq %rdi, %r9 + movq %rdi, %rax movq %rsi, %rdx - movq %rsi, %rdi - movq %r9, %rax addq $200203949, %rax adcq $-233495534, %rdx - xorl %ecx, %ecx - cmpq %rax, %r9 - sbbq %rdx, %rdi - setl %cl + seto %cl + movzbl %cl, %ecx movl %ecx, o(%rip) ret .cfi_endproc
--- pr93141-1.s 2020-01-03 20:47:57.962468300 +0100 +++ pr93141-1.s 2020-01-03 20:56:37.624702968 +0100 @@ -6,27 +6,10 @@ foo: .LFB0: .cfi_startproc - pushl %esi - .cfi_def_cfa_offset 8 - .cfi_offset 6, -8 - pushl %ebx - .cfi_def_cfa_offset 12 - .cfi_offset 3, -12 - movl 12(%esp), %ecx - movl 16(%esp), %ebx - movl %ecx, %eax - addl 20(%esp), %eax - movl %ebx, %edx - adcl 24(%esp), %edx - cmpl %ecx, %eax - movl %edx, %esi - sbbl %ebx, %esi - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 8 - popl %esi - .cfi_restore 6 - .cfi_def_cfa_offset 4 + movl 12(%esp), %eax + movl 16(%esp), %edx + addl 4(%esp), %eax + adcl 8(%esp), %edx setc %cl movzbl %cl, %ecx movl %ecx, o @@ -60,9 +43,6 @@ baz: movl 8(%esp), %edx addl $200203949, %eax adcl $233495534, %edx - cmpl $200203949, %eax - movl %edx, %ecx - sbbl $233495534, %ecx setc %cl movzbl %cl, %ecx movl %ecx, o @@ -76,26 +56,13 @@ baz: qux: .LFB3: .cfi_startproc - pushl %ebx - .cfi_def_cfa_offset 8 - .cfi_offset 3, -8 - movl 20(%esp), %edx - movl 8(%esp), %ecx - movl 12(%esp), %ebx - addl 16(%esp), %ecx movl 12(%esp), %eax - adcl 20(%esp), %ebx - xorl 20(%esp), %eax - xorl %ebx, %edx - notl %eax - andl %eax, %edx - movl %ecx, %eax - shrl $31, %edx - movl %edx, o - movl %ebx, %edx - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 4 + movl 16(%esp), %edx + addl 4(%esp), %eax + adcl 8(%esp), %edx + seto %cl + movzbl %cl, %ecx + movl %ecx, o ret .cfi_endproc .LFE3: @@ -106,28 +73,11 @@ qux: quux: .LFB4: .cfi_startproc - pushl %esi - .cfi_def_cfa_offset 8 - .cfi_offset 6, -8 - pushl %ebx - .cfi_def_cfa_offset 12 - .cfi_offset 3, -12 - movl 16(%esp), %ebx - movl 12(%esp), %ecx - movl %ebx, %edx + movl 8(%esp), %edx + xorl %ecx, %ecx + movl 4(%esp), %eax addl $233495534, %edx - cmpl %ecx, %ecx - movl %ecx, %eax - movl %edx, %esi - sbbl %ebx, %esi - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 8 - popl %esi - .cfi_restore 6 - .cfi_def_cfa_offset 4 - setl %cl - movzbl %cl, %ecx + seto %cl movl %ecx, o ret .cfi_endproc @@ -139,28 +89,11 @@ quux: corge: .LFB5: .cfi_startproc - pushl %esi - .cfi_def_cfa_offset 8 - .cfi_offset 6, -8 - pushl %ebx - .cfi_def_cfa_offset 12 - .cfi_offset 3, -12 - movl 12(%esp), %ecx - movl 16(%esp), %ebx - movl %ecx, %eax + movl 4(%esp), %eax + movl 8(%esp), %edx addl $200203949, %eax - movl %ebx, %edx adcl $233495534, %edx - cmpl %ecx, %eax - movl %edx, %esi - sbbl %ebx, %esi - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 8 - popl %esi - .cfi_restore 6 - .cfi_def_cfa_offset 4 - setl %cl + seto %cl movzbl %cl, %ecx movl %ecx, o ret @@ -173,21 +106,11 @@ corge: grault: .LFB6: .cfi_startproc - pushl %ebx - .cfi_def_cfa_offset 8 - .cfi_offset 3, -8 - movl 12(%esp), %ebx - movl 8(%esp), %ecx - movl %ebx, %edx - subl $233495534, %edx - cmpl %ecx, %ecx - movl %ecx, %eax - sbbl %edx, %ebx - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 4 - setl %cl - movzbl %cl, %ecx + movl 8(%esp), %edx + xorl %ecx, %ecx + movl 4(%esp), %eax + addl $-233495534, %edx + seto %cl movl %ecx, o ret .cfi_endproc @@ -199,21 +122,11 @@ grault: garply: .LFB7: .cfi_startproc - pushl %ebx - .cfi_def_cfa_offset 8 - .cfi_offset 3, -8 - movl 8(%esp), %ecx - movl 12(%esp), %ebx - movl %ecx, %eax - movl %ebx, %edx + movl 4(%esp), %eax + movl 8(%esp), %edx addl $200203949, %eax adcl $-233495534, %edx - cmpl %eax, %ecx - sbbl %edx, %ebx - popl %ebx - .cfi_restore 3 - .cfi_def_cfa_offset 4 - setl %cl + seto %cl movzbl %cl, %ecx movl %ecx, o ret