On Friday 14 October 2005 01:41, Evan Cheng wrote:

#(insn:TI 126 125 40 (parallel [
#            (set (reg:SI 0 ax [71])
#                (minus:SI (reg:SI 0 ax [71])
#                    (reg:SI 5 di)))
#            (clobber (reg:CC 17 flags))
#        ]) 242 {*subsi_1} (insn_list:REG_DEP_TRUE 125 (insn_list:REG_DEP_TRUE 
37 (nil)))
#    (expr_list:REG_DEAD (reg:SI 5 di)
#        (expr_list:REG_UNUSED (reg:CC 17 flags)
#            (nil))))
        subl    %edi, %eax      # 126   *subsi_1/1      [length = 2]
#(insn:TI 40 126 41 (set (reg:CC 17 flags)
#        (compare:CC (reg:SI 0 ax [71])
#            (const_int 0 [0x0]))) 5 {*cmpsi_1_insn} (insn_list:REG_DEP_TRUE 
126 (nil))
#    (expr_list:REG_DEAD (reg:SI 0 ax [71])
#        (nil)))
        cmpl    $0, %eax        # 40    *cmpsi_1_insn/1 [length = 3]


>
>          .text
> globl _foo
> _foo:
>          pushl   %ebp
>          movl    %esp, %ebp
>          pushl   %edi
>          pushl   %esi
>          subl    $12, %esp
>          movl    8(%ebp), %edx
>          cmpl    12(%ebp), %edx
>          jg      L10
>          movl    16(%ebp), %eax
>          movl    20(%ebp), %ecx
>          leal    (%ecx,%eax,4), %eax
>          movl    %eax, -16(%ebp)
>          jmp     L4
> L12:
>          movl    -16(%ebp), %ecx
>          movl    (%ecx), %eax
>          movl    %eax, (%esi)
>          movl    -20(%ebp), %edi
>          movl    %edi, (%ecx)
>          addl    $4, %ecx
>          movl    %ecx, -16(%ebp)
>          addl    $1, %edx
>          cmpl    %edx, 12(%ebp)
>          jl      L10
> L13:
>          movl    20(%ebp), %ecx
> L4:
>          leal    (%ecx,%edx,4), %esi
>          movl    (%esi), %edi
>          movl    %edi, -20(%ebp)
>          movl    24(%ebp), %eax
>          addl    %edi, %eax
>          movl    32(%ebp), %edi
>          movsbl  (%eax,%edi),%eax
>          subl    28(%ebp), %eax
>          cmpl    $0, %eax   <---- extra compare...
>          je      L12
>          jg      L10
>          addl    $1, %edx
>          cmpl    %edx, 12(%ebp)
>          jge     L13
> L10:
>          addl    $12, %esp
>          popl    %esi
>          popl    %edi
>          popl    %ebp
>          ret
>
> The cmpl is not needed because subl has already set the flags.
>
> My question is: where and how would you suggest we do this
> optimization. With peephole2? Or in combine? In i386.md, I see
> pattern *subsi_2 looks like what I'd like to combine these two insn
> into:
>
> (define_insn "*subsi_2"
>    [(set (reg FLAGS_REG)
>      (compare
>        (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
>              (match_operand:SI 2 "general_operand" "ri,rm"))
>        (const_int 0)))
>     (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
>      (minus:SI (match_dup 1) (match_dup 2)))]
>    "ix86_match_ccmode (insn, CCGOCmode)
>     && ix86_binary_operator_ok (MINUS, SImode, operands)"
>    "sub{l}\t{%2, %0|%0, %2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "SI")])
>
> But I do not see a peephole2 that would generate this insn. Does
> anyone know how this pattern is used?
>
> Suggestions are appreciated!
>
> Thanks,
>
> Evan Cheng
> Apple Computers, Inc.

Reply via email to