This patch is for google branches only. This is the patch I plan to apply after reverting an earlier set of patches ported over from trunk that addressed the same problem but were causing some performance regressions. This is a narrower fix I had originally prepared before finding the trunk patches.
This addresses only the specific problem of the and*i_1 patterns preferring to match an earlier set of patterns "r,0,Z" when the source and dest registers are the same, thus preferring to keep the instruction as an "andw" instead of using a zero-extending move when the constant is 0xff. We now will prefer to convert to a zero-extending move when the constant is 0xff and the machine has LCP stalls. Google ref b/6615073. Tested with crosstool. Ok for google-4_7? 2012-08-23 Teresa Johnson <tejohn...@google.com> * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern first to catch an and with 0xff that would be turned into an andw on machines with length-changing prefix stalls, and instead convert it into a zero-extending move. (andsi_1, andhi_1): Ditto. * config/i386/constraints.md (La): Rename from "L". (Lh): New constraint to match possible LCP stalling ands. * config/i386/i386.c (ix86_binary_operator_ok): Update for rename of "L" constraint. Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 188251) +++ config/i386/i386.md (working copy) @@ -7697,10 +7697,10 @@ "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;") (define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" { @@ -7738,8 +7738,8 @@ return "and{q}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,alu,imovx") - (set_attr "length_immediate" "*,*,*,0") + [(set_attr "type" "imovx,alu,alu,alu,imovx") + (set_attr "length_immediate" "0,*,*,*,0") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7747,12 +7747,12 @@ (match_operand 1 "ext_QIreg_operand" ""))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,DI,DI,SI")]) + (set_attr "mode" "SI,SI,DI,DI,SI")]) (define_insn "*andsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm") + (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, SImode, operands)" { @@ -7783,7 +7783,7 @@ return "and{l}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") + [(set_attr "type" "imovx,alu,alu,imovx") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7791,7 +7791,7 @@ (match_operand 1 "ext_QIreg_operand" ""))) (const_string "1") (const_string "*"))) - (set_attr "length_immediate" "*,*,0") + (set_attr "length_immediate" "0,*,*,0") (set_attr "mode" "SI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand @@ -7807,9 +7807,9 @@ (set_attr "mode" "SI")]) (define_insn "*andhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:HI 2 "general_operand" "rn,rm,L"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm") + (match_operand:HI 2 "general_operand" "Lh,rn,rm,L"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, HImode, operands)" { @@ -7826,15 +7826,15 @@ return "and{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") - (set_attr "length_immediate" "*,*,0") + [(set_attr "type" "imovx,alu,alu,imovx") + (set_attr "length_immediate" "0,*,*,0") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") (match_operand 1 "ext_QIreg_operand" "")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI,HI,SI")]) + (set_attr "mode" "SI,HI,HI,SI")]) ;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*andqi_1" Index: config/i386/constraints.md =================================================================== --- config/i386/constraints.md (revision 184516) +++ config/i386/constraints.md (working copy) @@ -148,13 +148,19 @@ (and (match_code "const_int") (match_test "IN_RANGE (ival, -128, 127)"))) -(define_constraint "L" +(define_constraint "La" "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF} for AND as a zero-extending move." (and (match_code "const_int") (match_test "ival == 0xff || ival == 0xffff || ival == (HOST_WIDE_INT) 0xffffffff"))) +(define_constraint "Lh" + "@code{0xFF} for AND as a zero-extending move when there are LCP stalls." + (and (match_code "const_int") + (match_test "ival == 0xff") + (match_test "TARGET_LCP_STALL"))) + (define_constraint "M" "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." (and (match_code "const_int") Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 188251) +++ config/i386/i386.c (working copy) @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum && (mode == HImode || mode == SImode || (TARGET_64BIT && mode == DImode)) - && satisfies_constraint_L (src2)); + && satisfies_constraint_La (src2)); return true; } -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413