Hi,

Here is a bits of code from bzip2:

#define mswap(zz1, zz2) { int zztmp = zz1; zz1 = zz2; zz2 = zztmp; }

void foo(int unLo, int unHi, int ltLo, int *ptr, char *block, int med, int d, int n) {

  while (1) {
    if (unLo > unHi) break;
    n = ((int)block[ptr[unLo]+d]) - med;
    if (n == 0) {
      mswap(ptr[unLo], ptr[ltLo]);
      ltLo++; unLo++; continue;
    };
    if (n >  0) break;
    unLo++;
  }
}

gcc produces the following code:

        .text
.globl _foo
_foo:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        subl    $12, %esp
        movl    8(%ebp), %edx
        cmpl    12(%ebp), %edx
        jg      L10
        movl    16(%ebp), %eax
        movl    20(%ebp), %ecx
        leal    (%ecx,%eax,4), %eax
        movl    %eax, -16(%ebp)
        jmp     L4
L12:
        movl    -16(%ebp), %ecx
        movl    (%ecx), %eax
        movl    %eax, (%esi)
        movl    -20(%ebp), %edi
        movl    %edi, (%ecx)
        addl    $4, %ecx
        movl    %ecx, -16(%ebp)
        addl    $1, %edx
        cmpl    %edx, 12(%ebp)
        jl      L10
L13:
        movl    20(%ebp), %ecx
L4:
        leal    (%ecx,%edx,4), %esi
        movl    (%esi), %edi
        movl    %edi, -20(%ebp)
        movl    24(%ebp), %eax
        addl    %edi, %eax
        movl    32(%ebp), %edi
        movsbl  (%eax,%edi),%eax
        subl    28(%ebp), %eax
        cmpl    $0, %eax   <---- extra compare...
        je      L12
        jg      L10
        addl    $1, %edx
        cmpl    %edx, 12(%ebp)
        jge     L13
L10:
        addl    $12, %esp
        popl    %esi
        popl    %edi
        popl    %ebp
        ret

The cmpl is not needed because subl has already set the flags.

My question is: where and how would you suggest we do this optimization. With peephole2? Or in combine? In i386.md, I see pattern *subsi_2 looks like what I'd like to combine these two insn into:

(define_insn "*subsi_2"
  [(set (reg FLAGS_REG)
    (compare
      (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
            (match_operand:SI 2 "general_operand" "ri,rm"))
      (const_int 0)))
   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
    (minus:SI (match_dup 1) (match_dup 2)))]
  "ix86_match_ccmode (insn, CCGOCmode)
   && ix86_binary_operator_ok (MINUS, SImode, operands)"
  "sub{l}\t{%2, %0|%0, %2}"
  [(set_attr "type" "alu")
   (set_attr "mode" "SI")])

But I do not see a peephole2 that would generate this insn. Does anyone know how this pattern is used?

Suggestions are appreciated!

Thanks,

Evan Cheng
Apple Computers, Inc.

Reply via email to