Before 1089d083117 Simplify (B * v + C) * D -> BD* v + CD when B,C,D are all INTEGER_CS T.
the loop was .L2: movl (%rdi,%rdx), %eax addl $12345, %eax imull $-1564285888, %eax, %eax leal -333519936(%rax), %eax movl %eax, (%rsi,%rdx) addq $4, %rdx cmpq $1024, %rdx jne .L2 There were 1 addl and 1 leal. 1 addq was to update the loop counter. The optimized loop is .L2: imull $-1564285888, (%rdi,%rax), %edx subl $1269844480, %edx movl %edx, (%rsi,%rax) addq $4, %rax cmpq $1024, %rax jne .L2 1 addl is changed to subl and leal is removed. Adjust assembly scan to check for 1 subl and 1 addl/addq as well as lea removal. * gcc.target/i386/pr53533-1.c: Adjust assembly scan. * gcc.target/i386/pr53533-3.c: Likewise. -- H.J.
From 2e3984e83900772710c5a652f1f7ad0e9a46e489 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sat, 12 Oct 2024 05:53:49 +0800 Subject: [PATCH] gcc.target/i386/pr53533-[13].c: Adjust assembly scan Before 1089d083117 Simplify (B * v + C) * D -> BD* v + CD when B,C,D are all INTEGER_CST. the loop was .L2: movl (%rdi,%rdx), %eax addl $12345, %eax imull $-1564285888, %eax, %eax leal -333519936(%rax), %eax movl %eax, (%rsi,%rdx) addq $4, %rdx cmpq $1024, %rdx jne .L2 There were 1 addl and 1 leal. 1 addq was to update the loop counter. The optimized loop is .L2: imull $-1564285888, (%rdi,%rax), %edx subl $1269844480, %edx movl %edx, (%rsi,%rax) addq $4, %rax cmpq $1024, %rax jne .L2 1 addl is changed to subl and leal is removed. Adjust assembly scan to check for 1 subl and 1 addl/addq as well as lea removal. * gcc.target/i386/pr53533-1.c: Adjust assembly scan. * gcc.target/i386/pr53533-3.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/testsuite/gcc.target/i386/pr53533-1.c | 4 +++- gcc/testsuite/gcc.target/i386/pr53533-3.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr53533-1.c b/gcc/testsuite/gcc.target/i386/pr53533-1.c index 095de665366..11d12015145 100644 --- a/gcc/testsuite/gcc.target/i386/pr53533-1.c +++ b/gcc/testsuite/gcc.target/i386/pr53533-1.c @@ -1,7 +1,9 @@ /* { dg-do compile } */ /* { dg-options "-O1" } */ /* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */ -/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "subl\[ \t\]" "1" } } */ +/* { dg-final { scan-assembler-times "add(?:l|q)\[ \t\]" "1" } } */ +/* { dg-final { scan-assembler-not "leal" } } */ void __attribute__((noipa)) diff --git a/gcc/testsuite/gcc.target/i386/pr53533-3.c b/gcc/testsuite/gcc.target/i386/pr53533-3.c index 3b260d134e9..347fa828eb7 100644 --- a/gcc/testsuite/gcc.target/i386/pr53533-3.c +++ b/gcc/testsuite/gcc.target/i386/pr53533-3.c @@ -1,7 +1,9 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fwrapv" } */ /* { dg-final { scan-assembler-times "imull\[ \t\]" "1" } } */ -/* { dg-final { scan-assembler-times "(?:addl|subl)\[ \t\]" "1" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "subl\[ \t\]" "1" } } */ +/* { dg-final { scan-assembler-times "add(?:l|q)\[ \t\]" "1" } } */ +/* { dg-final { scan-assembler-not "leal" } } */ void __attribute__((noipa)) -- 2.47.0