On Tue, May 12, 2015 at 7:43 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > This patch improves expansion of __builtin_mul_overflow for HImode, both > signed and unsigned, on x86_64/i686. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2015-05-12 Jakub Jelinek <ja...@redhat.com> > > PR target/66112 > * config/i386/i386.md (mulv<mode>4, umulv<mode>4, *umulv<mode>4): > Use SWI248 iterator instead of SWI.
These names along with the other *v<mode>4 patterns need documentation in md.texi. Ramana > (*mulv<mode>4_1): Use SWI48 instead of SWI. Simplify output template. > Use eq_attr "alternative" "0" instead of match_test in > length_immediate attribute computation. > (*mulvhi4, *mulvhi4_1): New define_insns. > > * gcc.target/i386/pr66112-2.c: New test. > > --- gcc/config/i386/i386.md.jj 2015-05-11 09:08:21.000000000 +0200 > +++ gcc/config/i386/i386.md 2015-05-12 11:26:55.642794479 +0200 > @@ -6602,14 +6602,14 @@ > [(parallel [(set (reg:CCO FLAGS_REG) > (eq:CCO (mult:<DWI> > (sign_extend:<DWI> > - (match_operand:SWI48 1 "register_operand")) > + (match_operand:SWI248 1 "register_operand")) > (match_dup 4)) > (sign_extend:<DWI> > - (mult:SWI48 (match_dup 1) > - (match_operand:SWI48 2 > - "<general_operand>"))))) > - (set (match_operand:SWI48 0 "register_operand") > - (mult:SWI48 (match_dup 1) (match_dup 2)))]) > + (mult:SWI248 (match_dup 1) > + (match_operand:SWI248 2 > + "<general_operand>"))))) > + (set (match_operand:SWI248 0 "register_operand") > + (mult:SWI248 (match_dup 1) (match_dup 2)))]) > (set (pc) (if_then_else > (eq (reg:CCO FLAGS_REG) (const_int 0)) > (label_ref (match_operand 3)) > @@ -6665,16 +6665,14 @@ > (match_operand:<DWI> 3 "const_int_operand" "K,i")) > (sign_extend:<DWI> > (mult:SWI48 (match_dup 1) > - (match_operand:SWI 2 "x86_64_immediate_operand" > - "K,<i>"))))) > + (match_operand:SWI48 2 > + "x86_64_immediate_operand" "K,<i>"))))) > (set (match_operand:SWI48 0 "register_operand" "=r,r") > (mult:SWI48 (match_dup 1) (match_dup 2)))] > "!(MEM_P (operands[1]) && MEM_P (operands[2])) > && CONST_INT_P (operands[2]) > && INTVAL (operands[2]) == INTVAL (operands[3])" > - "@ > - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} > - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > + "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "imul") > (set (attr "athlon_decode") > (cond [(eq_attr "cpu" "athlon") > @@ -6689,26 +6687,78 @@ > (set_attr "bdver1_decode" "direct") > (set_attr "mode" "<MODE>") > (set (attr "length_immediate") > - (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") > + (cond [(eq_attr "alternative" "0") > (const_string "1") > (match_test "<MODE_SIZE> == 8") > (const_string "4")] > (const_string "<MODE_SIZE>")))]) > > +(define_insn "*mulvhi4" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO (mult:SI > + (sign_extend:SI > + (match_operand:HI 1 "nonimmediate_operand" "0")) > + (sign_extend:SI > + (match_operand:HI 2 "general_operand" "mr"))) > + (sign_extend:SI > + (mult:HI (match_dup 1) (match_dup 2))))) > + (set (match_operand:HI 0 "register_operand" "=r") > + (mult:HI (match_dup 1) (match_dup 2)))] > + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" > + "imul{w}\t{%2, %0|%0, %2}" > + [(set_attr "type" "imul") > + (set_attr "prefix_0f" "1") > + (set_attr "athlon_decode" "vector") > + (set_attr "amdfam10_decode" "direct") > + (set_attr "bdver1_decode" "double") > + (set_attr "mode" "HI")]) > + > +(define_insn "*mulvhi4_1" > + [(set (reg:CCO FLAGS_REG) > + (eq:CCO (mult:SI > + (sign_extend:SI > + (match_operand:HI 1 "nonimmediate_operand" "rm,rm")) > + (match_operand:SI 3 "const_int_operand" "K,i")) > + (sign_extend:SI > + (mult:HI (match_dup 1) > + (match_operand:HI 2 > + "x86_64_immediate_operand" "K,n"))))) > + (set (match_operand:HI 0 "register_operand" "=r,r") > + (mult:HI (match_dup 1) (match_dup 2)))] > + "!(MEM_P (operands[1]) && MEM_P (operands[2])) > + && CONST_INT_P (operands[2]) > + && INTVAL (operands[2]) == INTVAL (operands[3])" > + "imul{w}\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "type" "imul") > + (set_attr "prefix_0f" "0") > + (set (attr "athlon_decode") > + (cond [(eq_attr "cpu" "athlon") > + (const_string "vector") > + (eq_attr "alternative" "1") > + (const_string "vector")] > + (const_string "direct"))) > + (set_attr "amdfam10_decode" "vector") > + (set_attr "bdver1_decode" "double") > + (set_attr "mode" "HI") > + (set (attr "length_immediate") > + (cond [(eq_attr "alternative" "0") > + (const_string "1")] > + (const_string "2")))]) > + > (define_expand "umulv<mode>4" > [(parallel [(set (reg:CCO FLAGS_REG) > (eq:CCO (mult:<DWI> > (zero_extend:<DWI> > - (match_operand:SWI48 1 > + (match_operand:SWI248 1 > "nonimmediate_operand")) > (zero_extend:<DWI> > - (match_operand:SWI48 2 > + (match_operand:SWI248 2 > > "nonimmediate_operand"))) > (zero_extend:<DWI> > - (mult:SWI48 (match_dup 1) (match_dup 2))))) > - (set (match_operand:SWI48 0 "register_operand") > - (mult:SWI48 (match_dup 1) (match_dup 2))) > - (clobber (match_scratch:SWI48 4))]) > + (mult:SWI248 (match_dup 1) (match_dup 2))))) > + (set (match_operand:SWI248 0 "register_operand") > + (mult:SWI248 (match_dup 1) (match_dup 2))) > + (clobber (match_scratch:SWI248 4))]) > (set (pc) (if_then_else > (eq (reg:CCO FLAGS_REG) (const_int 0)) > (label_ref (match_operand 3)) > @@ -6723,14 +6773,14 @@ > [(set (reg:CCO FLAGS_REG) > (eq:CCO (mult:<DWI> > (zero_extend:<DWI> > - (match_operand:SWI48 1 "nonimmediate_operand" "%0")) > + (match_operand:SWI248 1 "nonimmediate_operand" "%0")) > (zero_extend:<DWI> > - (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) > + (match_operand:SWI248 2 "nonimmediate_operand" "rm"))) > (zero_extend:<DWI> > - (mult:SWI48 (match_dup 1) (match_dup 2))))) > - (set (match_operand:SWI48 0 "register_operand" "=a") > - (mult:SWI48 (match_dup 1) (match_dup 2))) > - (clobber (match_scratch:SWI48 3 "=d"))] > + (mult:SWI248 (match_dup 1) (match_dup 2))))) > + (set (match_operand:SWI248 0 "register_operand" "=a") > + (mult:SWI248 (match_dup 1) (match_dup 2))) > + (clobber (match_scratch:SWI248 3 "=d"))] > "!(MEM_P (operands[1]) && MEM_P (operands[2]))" > "mul{<imodesuffix>}\t%2" > [(set_attr "type" "imul") > --- gcc/testsuite/gcc.target/i386/pr66112-2.c.jj 2015-05-12 > 10:46:18.565328732 +0200 > +++ gcc/testsuite/gcc.target/i386/pr66112-2.c 2015-05-12 10:50:16.203437790 > +0200 > @@ -0,0 +1,29 @@ > +/* PR target/66112 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +unsigned short int > +foo (int a, int b) > +{ > + unsigned short int res; > + a &= 0xffff; > + b &= 0xffff; > + if (__builtin_mul_overflow (a, b, &res)) > + res = 0x123; > + return res; > +} > + > +short int > +bar (int a, int b) > +{ > + short int res; > + a = (short int) a; > + b = (short int) b; > + if (__builtin_mul_overflow (a, b, &res)) > + res = 0x123; > + return res; > +} > + > +/* { dg-final { scan-assembler-times "jn?o\[ \t\]" 2 } } */ > +/* { dg-final { scan-assembler-times "mulw\[ \t\]" 2 } } */ > +/* { dg-final { scan-assembler-times "imulw\[ \t\]" 1 } } */ > > Jakub