Hi!

This patch improves expansion of __builtin_mul_overflow for HImode, both
signed and unsigned, on x86_64/i686.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-05-12  Jakub Jelinek  <ja...@redhat.com>

        PR target/66112
        * config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4):
        Use SWI248 iterator instead of SWI.
        (*mulv<mode>4_1): Use SWI48 instead of SWI.  Simplify output template.
        Use eq_attr "alternative" "0" instead of match_test in
        length_immediate attribute computation.
        (*mulvhi4, *mulvhi4_1): New define_insns.

        * gcc.target/i386/pr66112-2.c: New test.

--- gcc/config/i386/i386.md.jj  2015-05-11 09:08:21.000000000 +0200
+++ gcc/config/i386/i386.md     2015-05-12 11:26:55.642794479 +0200
@@ -6602,14 +6602,14 @@
   [(parallel [(set (reg:CCO FLAGS_REG)
                   (eq:CCO (mult:<DWI>
                              (sign_extend:<DWI>
-                                (match_operand:SWI48 1 "register_operand"))
+                                (match_operand:SWI248 1 "register_operand"))
                              (match_dup 4))
                           (sign_extend:<DWI>
-                             (mult:SWI48 (match_dup 1)
-                                         (match_operand:SWI48 2
-                                            "<general_operand>")))))
-             (set (match_operand:SWI48 0 "register_operand")
-                  (mult:SWI48 (match_dup 1) (match_dup 2)))])
+                             (mult:SWI248 (match_dup 1)
+                                          (match_operand:SWI248 2
+                                             "<general_operand>")))))
+             (set (match_operand:SWI248 0 "register_operand")
+                  (mult:SWI248 (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
               (eq (reg:CCO FLAGS_REG) (const_int 0))
               (label_ref (match_operand 3))
@@ -6665,16 +6665,14 @@
                   (match_operand:<DWI> 3 "const_int_operand" "K,i"))
                (sign_extend:<DWI>
                   (mult:SWI48 (match_dup 1)
-                              (match_operand:SWI 2 "x86_64_immediate_operand"
-                                                   "K,<i>")))))
+                              (match_operand:SWI48 2
+                                 "x86_64_immediate_operand" "K,<i>")))))
    (set (match_operand:SWI48 0 "register_operand" "=r,r")
        (mult:SWI48 (match_dup 1) (match_dup 2)))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))
    && CONST_INT_P (operands[2])
    && INTVAL (operands[2]) == INTVAL (operands[3])"
-  "@
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "imul")
    (set (attr "athlon_decode")
        (cond [(eq_attr "cpu" "athlon")
@@ -6689,26 +6687,78 @@
    (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "<MODE>")
    (set (attr "length_immediate")
-       (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+       (cond [(eq_attr "alternative" "0")
                  (const_string "1")
               (match_test "<MODE_SIZE> == 8")
                  (const_string "4")]
              (const_string "<MODE_SIZE>")))])
 
+(define_insn "*mulvhi4"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO (mult:SI
+                  (sign_extend:SI
+                     (match_operand:HI 1 "nonimmediate_operand" "0"))
+                  (sign_extend:SI
+                     (match_operand:HI 2 "general_operand" "mr")))
+               (sign_extend:SI
+                  (mult:HI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:HI 0 "register_operand" "=r")
+       (mult:HI (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "imul{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")])
+
+(define_insn "*mulvhi4_1"
+  [(set (reg:CCO FLAGS_REG)
+       (eq:CCO (mult:SI
+                  (sign_extend:SI
+                     (match_operand:HI 1 "nonimmediate_operand" "rm,rm"))
+                  (match_operand:SI 3 "const_int_operand" "K,i"))
+               (sign_extend:SI
+                  (mult:HI (match_dup 1)
+                              (match_operand:HI 2
+                                 "x86_64_immediate_operand" "K,n")))))
+   (set (match_operand:HI 0 "register_operand" "=r,r")
+       (mult:HI (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[3])"
+  "imul{w}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0")
+   (set (attr "athlon_decode")
+       (cond [(eq_attr "cpu" "athlon")
+                 (const_string "vector")
+              (eq_attr "alternative" "1")
+                 (const_string "vector")]
+             (const_string "direct")))
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")
+   (set (attr "length_immediate")
+       (cond [(eq_attr "alternative" "0")
+                 (const_string "1")]
+             (const_string "2")))])
+
 (define_expand "umulv<mode>4"
   [(parallel [(set (reg:CCO FLAGS_REG)
                   (eq:CCO (mult:<DWI>
                              (zero_extend:<DWI>
-                                (match_operand:SWI48 1
+                                (match_operand:SWI248 1
                                                      "nonimmediate_operand"))
                              (zero_extend:<DWI>
-                                (match_operand:SWI48 2
+                                (match_operand:SWI248 2
                                                      "nonimmediate_operand")))
                           (zero_extend:<DWI>
-                             (mult:SWI48 (match_dup 1) (match_dup 2)))))
-             (set (match_operand:SWI48 0 "register_operand")
-                  (mult:SWI48 (match_dup 1) (match_dup 2)))
-             (clobber (match_scratch:SWI48 4))])
+                             (mult:SWI248 (match_dup 1) (match_dup 2)))))
+             (set (match_operand:SWI248 0 "register_operand")
+                  (mult:SWI248 (match_dup 1) (match_dup 2)))
+             (clobber (match_scratch:SWI248 4))])
    (set (pc) (if_then_else
               (eq (reg:CCO FLAGS_REG) (const_int 0))
               (label_ref (match_operand 3))
@@ -6723,14 +6773,14 @@
   [(set (reg:CCO FLAGS_REG)
        (eq:CCO (mult:<DWI>
                   (zero_extend:<DWI>
-                     (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
+                     (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
                   (zero_extend:<DWI>
-                     (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+                     (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
                (zero_extend:<DWI>
-                  (mult:SWI48 (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI48 0 "register_operand" "=a")
-       (mult:SWI48 (match_dup 1) (match_dup 2)))
-   (clobber (match_scratch:SWI48 3 "=d"))]
+                  (mult:SWI248 (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI248 0 "register_operand" "=a")
+       (mult:SWI248 (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SWI248 3 "=d"))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "mul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
--- gcc/testsuite/gcc.target/i386/pr66112-2.c.jj        2015-05-12 
10:46:18.565328732 +0200
+++ gcc/testsuite/gcc.target/i386/pr66112-2.c   2015-05-12 10:50:16.203437790 
+0200
@@ -0,0 +1,29 @@
+/* PR target/66112 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned short int
+foo (int a, int b)
+{
+  unsigned short int res;
+  a &= 0xffff;
+  b &= 0xffff;
+  if (__builtin_mul_overflow (a, b, &res))
+    res = 0x123;
+  return res;
+}
+
+short int
+bar (int a, int b)
+{
+  short int res;
+  a = (short int) a;
+  b = (short int) b;
+  if (__builtin_mul_overflow (a, b, &res))
+    res = 0x123;
+  return res;
+}
+
+/* { dg-final { scan-assembler-times "jn?o\[ \t\]" 2 } } */
+/* { dg-final { scan-assembler-times "mulw\[ \t\]" 2 } } */
+/* { dg-final { scan-assembler-times "imulw\[ \t\]" 1 } } */

        Jakub

Reply via email to