Hi!

This is very similar to the previous PR93141 addv<mode>4 half and
improves signed __builtin_sub_overflow on double-words rather than
__builtin_add_overflow.

I have left out the uaddv<mode>4 double-word stuff, because I ran into
issues with it - as the pattern uses (set (reg:CC flags) (compare:CC (reg:TI) 
(reg:TI)))
as the first set, ifcvt.c (canonicalize_condition it calls) extracts
the TImode comparison arguments from it, but as we don't have a cmpti3
pattern, noce_try_store_flag fails and we don't manage to convert branchy
code into setc.  Even for smaller modes ifcvt in that case does weird
things, after the sub instruction which sets the flags too it emits
a cmp with the same arguments and only postreload we notice the compare is
redundant and remove it.  So, dunno if we want to improve somehow ifcvt
and for conditions where we'd look through a multiple set instructions
somehow try harder to reuse the original instruction, or if usub<dwi>4
should not use compare:CC of the operands and instead use some obfuscation
to only set CCCmode to prevent ifcvt from punting on it.

Anyway, this part works, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?

2020-01-07  Jakub Jelinek  <ja...@redhat.com>

        PR target/93141
        (subv<mode>4): Use SWIDWI iterator instead of SWI.  Use
        <general_hilo_operand> instead of <general_operand>.  Use
        CONST_SCALAR_INT_P instead of CONST_INT_P.
        (*subv<mode>4_1): Rename to ...
        (subv<mode>4_1): ... this.
        (*subv<dwi>4_doubleword, *addv<dwi>4_doubleword_1): New
        define_insn_and_split patterns.
        (*subv<mode>4_overflow_1, *addv<mode>4_overflow_2): New define_insn
        patterns.

        * gcc.target/i386/pr93141-1.c: Add tests with constants that have MSB
        of the low half of the constant set.
        * gcc.target/i386/pr93141-2.c: New test.

--- gcc/config/i386/i386.md.jj  2020-01-07 08:01:22.618613702 +0100
+++ gcc/config/i386/i386.md     2020-01-07 10:07:01.770587027 +0100
@@ -6569,16 +6569,17 @@ (define_insn "*subsi_2_zext"
 ;; Subtract with jump on overflow.
 (define_expand "subv<mode>4"
   [(parallel [(set (reg:CCO FLAGS_REG)
-                  (eq:CCO (minus:<DWI>
-                             (sign_extend:<DWI>
-                                (match_operand:SWI 1 "nonimmediate_operand"))
-                             (match_dup 4))
-                          (sign_extend:<DWI>
-                             (minus:SWI (match_dup 1)
-                                        (match_operand:SWI 2
-                                           "<general_operand>")))))
-             (set (match_operand:SWI 0 "register_operand")
-                  (minus:SWI (match_dup 1) (match_dup 2)))])
+                  (eq:CCO
+                    (minus:<DWI>
+                      (sign_extend:<DWI>
+                        (match_operand:SWIDWI 1 "nonimmediate_operand"))
+                      (match_dup 4))
+                    (sign_extend:<DWI>
+                      (minus:SWIDWI (match_dup 1)
+                                    (match_operand:SWIDWI 2
+                                               "<general_hilo_operand>")))))
+             (set (match_operand:SWIDWI 0 "register_operand")
+                  (minus:SWIDWI (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
               (eq (reg:CCO FLAGS_REG) (const_int 0))
               (label_ref (match_operand 3))
@@ -6586,7 +6587,7 @@ (define_expand "subv<mode>4"
   ""
 {
   ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
-  if (CONST_INT_P (operands[2]))
+  if (CONST_SCALAR_INT_P (operands[2]))
     operands[4] = operands[2];
   else
     operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
@@ -6608,7 +6609,7 @@ (define_insn "*subv<mode>4"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*subv<mode>4_1"
+(define_insn "subv<mode>4_1"
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (minus:<DWI>
                   (sign_extend:<DWI>
@@ -6633,6 +6634,162 @@ (define_insn "*subv<mode>4_1"
                  (const_string "4")]
              (const_string "<MODE_SIZE>")))])
 
+(define_insn_and_split "*subv<dwi>4_doubleword"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO
+         (minus:<QWI>
+           (sign_extend:<QWI>
+             (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
+           (sign_extend:<QWI>
+             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
+         (sign_extend:<QWI>
+           (minus:<DWI> (match_dup 1) (match_dup 2)))))
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
+       (minus:<DWI> (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+                  (compare:CC (match_dup 1) (match_dup 2)))
+             (set (match_dup 0)
+                  (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CCO FLAGS_REG)
+                  (eq:CCO
+                    (minus:<DWI>
+                      (minus:<DWI>
+                        (sign_extend:<DWI> (match_dup 4))
+                        (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
+                      (sign_extend:<DWI> (match_dup 5)))
+                    (sign_extend:<DWI>
+                      (minus:DWIH
+                        (minus:DWIH
+                          (match_dup 4)
+                          (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+                        (match_dup 5)))))
+             (set (match_dup 3)
+                  (minus:DWIH
+                    (minus:DWIH
+                      (match_dup 4)
+                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+                    (match_dup 5)))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
+})
+
+(define_insn_and_split "*subv<dwi>4_doubleword_1"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO
+         (minus:<QWI>
+           (sign_extend:<QWI>
+             (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
+           (match_operand:<QWI> 3 "const_scalar_int_operand" ""))
+         (sign_extend:<QWI>
+           (minus:<DWI>
+             (match_dup 1)
+             (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
+   (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+       (minus:<DWI> (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && CONST_SCALAR_INT_P (operands[2])
+   && rtx_equal_p (operands[2], operands[3])"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+                  (compare:CC (match_dup 1) (match_dup 2)))
+             (set (match_dup 0)
+                  (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CCO FLAGS_REG)
+                  (eq:CCO
+                    (minus:<DWI>
+                      (minus:<DWI>
+                        (sign_extend:<DWI> (match_dup 4))
+                        (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
+                      (match_dup 5))
+                    (sign_extend:<DWI>
+                      (minus:DWIH
+                        (minus:DWIH
+                          (match_dup 4)
+                          (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+                        (match_dup 5)))))
+             (set (match_dup 3)
+                  (minus:DWIH
+                    (minus:DWIH
+                      (match_dup 4)
+                      (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+                    (match_dup 5)))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
+  if (operands[2] == const0_rtx)
+    {
+      emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
+                                   operands[5]));
+      DONE;
+    }
+})
+
+(define_insn "*subv<mode>4_overflow_1"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO
+         (minus:<DWI>
+           (minus:<DWI>
+             (sign_extend:<DWI>
+               (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+             (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+               [(match_operand 3 "flags_reg_operand") (const_int 0)]))
+           (sign_extend:<DWI>
+             (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
+         (sign_extend:<DWI>
+           (minus:SWI
+             (minus:SWI
+               (match_dup 1)
+               (match_operator:SWI 5 "ix86_carry_flag_operator"
+                 [(match_dup 3) (const_int 0)]))
+             (match_dup 2)))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
+       (minus:SWI
+         (minus:SWI
+           (match_dup 1)
+           (match_op_dup 5 [(match_dup 3) (const_int 0)]))
+         (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subv<mode>4_overflow_2"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO
+         (minus:<DWI>
+           (minus:<DWI>
+             (sign_extend:<DWI>
+               (match_operand:SWI 1 "nonimmediate_operand" "%0"))
+             (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+               [(match_operand 3 "flags_reg_operand") (const_int 0)]))
+           (match_operand:<DWI> 6 "const_int_operand" ""))
+         (sign_extend:<DWI>
+           (minus:SWI
+             (minus:SWI
+               (match_dup 1)
+               (match_operator:SWI 5 "ix86_carry_flag_operator"
+                 [(match_dup 3) (const_int 0)]))
+             (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
+       (minus:SWI
+         (minus:SWI
+           (match_dup 1)
+           (match_op_dup 5 [(match_dup 3) (const_int 0)]))
+         (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[6])"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length_immediate")
+     (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+       (const_string "1")
+       (const_string "4")))])
+
 (define_expand "usubv<mode>4"
   [(parallel [(set (reg:CC FLAGS_REG)
                   (compare:CC
--- gcc/testsuite/gcc.target/i386/pr93141-1.c.jj        2020-01-05 
13:51:34.452627004 +0100
+++ gcc/testsuite/gcc.target/i386/pr93141-1.c   2020-01-07 10:45:36.213489037 
+0100
@@ -2,15 +2,17 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -masm=att" } */
 /* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */
-/* { dg-final { scan-assembler-times "setc\t%" 3 } } */
-/* { dg-final { scan-assembler-times "seto\t%" 5 } } */
-/* { dg-final { scan-assembler-times "adc\[lq]\t" 5 } } */
+/* { dg-final { scan-assembler-times "setc\t%" 5 } } */
+/* { dg-final { scan-assembler-times "seto\t%" 7 } } */
+/* { dg-final { scan-assembler-times "adc\[lq]\t" 9 } } */
 
 #ifdef __x86_64__
 typedef unsigned __int128 U;
+typedef unsigned long long HU;
 typedef signed __int128 S;
 #else
 typedef unsigned long long U;
+typedef unsigned int HU;
 typedef signed long long S;
 #endif
 int o;
@@ -81,3 +83,39 @@ garply (S x)
                                 | (S) 0xbeedead, &z);
   return z;
 }
+
+S
+waldo (S x)
+{
+  S z;
+  o = __builtin_add_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}
+
+S
+fred (S x)
+{
+  S z;
+  o = __builtin_add_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2)))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}
+
+U
+plugh (U x)
+{
+  U z;
+  o = __builtin_add_overflow (x, (U) ((((U) 0xdeadbee) << (sizeof (U) * 
__CHAR_BIT__ / 2))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}
+
+U
+xyzzy (U x)
+{
+  U z;
+  o = __builtin_add_overflow (x, (U) ((-(((U) 0xdeadbee) << (sizeof (U) * 
__CHAR_BIT__ / 2)))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}
--- gcc/testsuite/gcc.target/i386/pr93141-2.c.jj        2020-01-07 
10:30:53.966831451 +0100
+++ gcc/testsuite/gcc.target/i386/pr93141-2.c   2020-01-07 10:43:27.602433611 
+0100
@@ -0,0 +1,78 @@
+/* PR target/93141 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-not "cmp\[lq]\t" } } */
+/* { dg-final { scan-assembler-not "adc\[lq]\t" } } */
+/* { dg-final { scan-assembler-times "seto\t%" 7 } } */
+/* { dg-final { scan-assembler-times "sbb\[lq]\t" 5 } } */
+
+#ifdef __x86_64__
+typedef unsigned __int128 U;
+typedef unsigned long long HU;
+typedef signed __int128 S;
+#else
+typedef unsigned long long U;
+typedef signed int HU;
+typedef signed long long S;
+#endif
+int o;
+
+S
+qux (S x, S y)
+{
+  S z;
+  o = __builtin_sub_overflow (x, y, &z);
+  return z;
+}
+
+S
+quux (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, ((S) 0xdeadbee) << (sizeof (S) * __CHAR_BIT__ 
/ 2), &z);
+  return z;
+}
+
+S
+corge (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, (((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2))
+                                | (S) 0xbeedead, &z);
+  return z;
+}
+
+S
+grault (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, -((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2), &z);
+  return z;
+}
+
+S
+garply (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, (-(((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2)))
+                                | (S) 0xbeedead, &z);
+  return z;
+}
+
+S
+waldo (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, (S) ((((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}
+
+S
+fred (S x)
+{
+  S z;
+  o = __builtin_sub_overflow (x, (S) ((-(((S) 0xdeadbee) << (sizeof (S) * 
__CHAR_BIT__ / 2)))
+                                     | -(HU) 0xbeedead), &z);
+  return z;
+}

        Jakub

Reply via email to