Hi,
Here is a bits of code from bzip2:
#define mswap(zz1, zz2) { int zztmp = zz1; zz1 = zz2; zz2 =
zztmp; }
void foo(int unLo, int unHi, int ltLo, int *ptr, char *block, int
med, int d, int n) {
while (1) {
if (unLo > unHi) break;
n = ((int)block[ptr[unLo]+d]) - med;
if (n == 0) {
mswap(ptr[unLo], ptr[ltLo]);
ltLo++; unLo++; continue;
};
if (n > 0) break;
unLo++;
}
}
gcc produces the following code:
.text
.globl _foo
_foo:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
subl $12, %esp
movl 8(%ebp), %edx
cmpl 12(%ebp), %edx
jg L10
movl 16(%ebp), %eax
movl 20(%ebp), %ecx
leal (%ecx,%eax,4), %eax
movl %eax, -16(%ebp)
jmp L4
L12:
movl -16(%ebp), %ecx
movl (%ecx), %eax
movl %eax, (%esi)
movl -20(%ebp), %edi
movl %edi, (%ecx)
addl $4, %ecx
movl %ecx, -16(%ebp)
addl $1, %edx
cmpl %edx, 12(%ebp)
jl L10
L13:
movl 20(%ebp), %ecx
L4:
leal (%ecx,%edx,4), %esi
movl (%esi), %edi
movl %edi, -20(%ebp)
movl 24(%ebp), %eax
addl %edi, %eax
movl 32(%ebp), %edi
movsbl (%eax,%edi),%eax
subl 28(%ebp), %eax
cmpl $0, %eax <---- extra compare...
je L12
jg L10
addl $1, %edx
cmpl %edx, 12(%ebp)
jge L13
L10:
addl $12, %esp
popl %esi
popl %edi
popl %ebp
ret
The cmpl is not needed because subl has already set the flags.
My question is: where and how would you suggest we do this
optimization. With peephole2? Or in combine? In i386.md, I see
pattern *subsi_2 looks like what I'd like to combine these two insn
into:
(define_insn "*subsi_2"
[(set (reg FLAGS_REG)
(compare
(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
(match_operand:SI 2 "general_operand" "ri,rm"))
(const_int 0)))
(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(minus:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
&& ix86_binary_operator_ok (MINUS, SImode, operands)"
"sub{l}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
But I do not see a peephole2 that would generate this insn. Does
anyone know how this pattern is used?
Suggestions are appreciated!
Thanks,
Evan Cheng
Apple Computers, Inc.