Ok for now but some follow up may be needed.

thanks,

David

On Thu, Aug 23, 2012 at 4:21 PM, Teresa Johnson <tejohn...@google.com> wrote:
> On Thu, Aug 23, 2012 at 3:33 PM, Xinliang David Li <davi...@google.com> wrote:
>> On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohn...@google.com> wrote:
>>> This patch is for google branches only.
>>>
>>> This is the patch I plan to apply after reverting an earlier set of
>>> patches ported over from trunk that addressed the same problem but
>>> were causing some performance regressions. This is a narrower fix
>>> I had originally prepared before finding the trunk patches.
>>>
>>> This addresses only the specific problem of the and*i_1 patterns preferring
>>> to match an earlier set of patterns "r,0,Z" when the source and dest 
>>> registers
>>> are the same, thus preferring to keep the instruction as an "andw" instead
>>> of using a zero-extending move when the constant is 0xff. We now will
>>> prefer to convert to a zero-extending move when the constant is 0xff and
>>> the machine has LCP stalls.
>>>
>>> Google ref b/6615073.
>>>
>>> Tested with crosstool. Ok for google-4_7?
>>>
>>> 2012-08-23  Teresa Johnson  <tejohn...@google.com>
>>>
>>>         * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern
>>>         first to catch an and with 0xff that would be turned into
>>>         an andw on machines with length-changing prefix stalls,
>>>         and instead convert it into a zero-extending move.
>>>         (andsi_1, andhi_1): Ditto.
>>>         * config/i386/constraints.md (La): Rename from "L".
>>>         (Lh): New constraint to match possible LCP stalling ands.
>>>         * config/i386/i386.c (ix86_binary_operator_ok): Update
>>>         for rename of "L" constraint.
>>>
>>> Index: config/i386/i386.md
>>> ===================================================================
>>> --- config/i386/i386.md (revision 188251)
>>> +++ config/i386/i386.md (working copy)
>>> @@ -7697,10 +7697,10 @@
>>>    "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
>>>
>>>  (define_insn "*anddi_1"
>>> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r")
>>>         (and:DI
>>> -        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
>>> -        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
>>> +        (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm")
>>> +        (match_operand:DI 2 "x86_64_szext_general_operand" 
>>> "Lh,Z,re,rm,La")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
>>>  {
>>> @@ -7738,8 +7738,8 @@
>>>         return "and{q}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>
>> Why is there a need to split L into Lh and La? Removing redundant
>> 'and' seems beneficial.
>
> I first did move the L condition first, which did fix one benchmark's
> performance issue but I got better performance overall by splitting it
> (so an "and" with 0xffff or 0xffffffffff might stay an and rather than
> becoming a movz variant). I'm not sure why using an "and" instead of
> converting it to a movz in these non-LCP situations matters though.
>
>>
>>
>>> -  [(set_attr "type" "alu,alu,alu,imovx")
>>> -   (set_attr "length_immediate" "*,*,*,0")
>>> +  [(set_attr "type" "imovx,alu,alu,alu,imovx")
>>> +   (set_attr "length_immediate" "0,*,*,*,0")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>> @@ -7747,12 +7747,12 @@
>>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "mode" "SI,DI,DI,SI")])
>>> +   (set_attr "mode" "SI,SI,DI,DI,SI")])
>>>
>>>  (define_insn "*andsi_1"
>>> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
>>> -       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
>>> -               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
>>> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm")
>>> +               (match_operand:SI 2 "x86_64_general_operand" 
>>> "Lh,re,rm,La")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "ix86_binary_operator_ok (AND, SImode, operands)"
>>>  {
>>> @@ -7783,7 +7783,7 @@
>>>        return "and{l}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>> -  [(set_attr "type" "alu,alu,imovx")
>>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>> @@ -7791,7 +7791,7 @@
>>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "length_immediate" "*,*,0")
>>> +   (set_attr "length_immediate" "0,*,*,0")
>>>     (set_attr "mode" "SI")])
>>>
>>>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
>>> @@ -7807,9 +7807,9 @@
>>>     (set_attr "mode" "SI")])
>>>
>>>  (define_insn "*andhi_1"
>>> -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
>>> -       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
>>> -               (match_operand:HI 2 "general_operand" "rn,rm,L")))
>>> +  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm")
>>> +               (match_operand:HI 2 "general_operand" "Lh,rn,rm,L")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "ix86_binary_operator_ok (AND, HImode, operands)"
>>>  {
>>> @@ -7826,15 +7826,15 @@
>>>        return "and{w}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>> -  [(set_attr "type" "alu,alu,imovx")
>>> -   (set_attr "length_immediate" "*,*,0")
>>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>>> +   (set_attr "length_immediate" "0,*,*,0")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>>             (match_operand 1 "ext_QIreg_operand" ""))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "mode" "HI,HI,SI")])
>>> +   (set_attr "mode" "SI,HI,HI,SI")])
>>>
>>>  ;; %%% Potential partial reg stall on alternative 2.  What to do?
>>>  (define_insn "*andqi_1"
>>> Index: config/i386/constraints.md
>>> ===================================================================
>>> --- config/i386/constraints.md  (revision 184516)
>>> +++ config/i386/constraints.md  (working copy)
>>> @@ -148,13 +148,19 @@
>>>    (and (match_code "const_int")
>>>         (match_test "IN_RANGE (ival, -128, 127)")))
>>>
>>> -(define_constraint "L"
>>> +(define_constraint "La"
>>>    "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
>>>     for AND as a zero-extending move."
>>>    (and (match_code "const_int")
>>>         (match_test "ival == 0xff || ival == 0xffff
>>>                     || ival == (HOST_WIDE_INT) 0xffffffff")))
>>>
>>> +(define_constraint "Lh"
>>> +  "@code{0xFF} for AND as a zero-extending move when there are LCP stalls."
>>> +  (and (match_code "const_int")
>>> +       (match_test "ival == 0xff")
>>> +       (match_test "TARGET_LCP_STALL")))
>>
>>
>> This is necessary but not sufficient to generate LCP. For instance,
>> not in adddi and addsi instructions.
>
> I saw cases where andsi or anddi with 0xff was converted to an andw in
> the end (possibly after some combining?).
>
> Teresa
>
>>
>>
>> thanks,
>>
>> David
>>
>>> +
>>>  (define_constraint "M"
>>>    "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
>>>    (and (match_code "const_int")
>>> Index: config/i386/i386.c
>>> ===================================================================
>>> --- config/i386/i386.c  (revision 188251)
>>> +++ config/i386/i386.c  (working copy)
>>> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum
>>>             && (mode == HImode
>>>                 || mode == SImode
>>>                 || (TARGET_64BIT && mode == DImode))
>>> -           && satisfies_constraint_L (src2));
>>> +           && satisfies_constraint_La (src2));
>>>
>>>    return true;
>>>  }
>>>
>>>
>>> --
>>> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
>
>
>
> --
> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413

Reply via email to