Hi! The following patch attempts to improve find_split_point inside of complex MEM addresses, if the target supports REG + REG + const addressing, but doesn't support more_complex_rtx + REG + const, try to split it at more_complex_rtx rather than more_complex_rtx + REG.
On the original testcase from the PR, the change is with x86_64 -m64 -O2: movzbl (%rdi), %eax - addq $1, %rax salq $4, %rax - movl 8(%rsp,%rax), %eax + movl 24(%rsp,%rax), %eax movl %eax, (%rsi) ret and -m32 -O2: movl 4116(%esp), %eax movzbl (%eax), %eax - addl $1, %eax sall $4, %eax - movl 4(%esp,%eax), %edx + movl 20(%esp,%eax), %edx movl 4120(%esp), %eax movl %edx, (%eax) ret (though, that testcase not included, e.g. because trying to optimize a single insn in a code that passes around more than 4KB structure as argument by value is pointless). Bootstrapped/regtested on x86_64-linux and i686-linux, bootstrapped on powerpc64{,le}-linux (regtest still pending on those two), ok for trunk? 2018-11-29 Jakub Jelinek <ja...@redhat.com> PR target/54589 * combine.c (find_split_point): For invalid memory address nonobj + obj + const, if reg + obj + const is valid addressing mode, split at nonobj. Use if rather than else if for the fallback. Comment fixes. * gcc.target/i386/pr54589.c: New test. --- gcc/combine.c.jj 2018-11-21 19:57:26.229726485 +0100 +++ gcc/combine.c 2018-11-29 17:57:48.069423874 +0100 @@ -4945,7 +4945,7 @@ find_split_point (rtx *loc, rtx_insn *in } /* If we have a PLUS whose second operand is a constant and the - address is not valid, perhaps will can split it up using + address is not valid, perhaps we can split it up using the machine-specific way to split large constants. We use the first pseudo-reg (one of the virtual regs) as a placeholder; it will not remain in the result. */ @@ -4960,7 +4960,7 @@ find_split_point (rtx *loc, rtx_insn *in /* This should have produced two insns, each of which sets our placeholder. If the source of the second is a valid address, - we can make put both sources together and make a split point + we can put both sources together and make a split point in the middle. */ if (seq @@ -5001,14 +5001,51 @@ find_split_point (rtx *loc, rtx_insn *in } } + /* If that didn't work and we have a nested plus, like: + ((REG1 * CONST1) + REG2) + CONST2 and (REG1 + REG2) + CONST2 + is valid address, try to split (REG1 * CONST1). */ + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS + && !OBJECT_P (XEXP (XEXP (XEXP (x, 0), 0), 0)) + && OBJECT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) + && ! (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SUBREG + && OBJECT_P (SUBREG_REG (XEXP (XEXP (XEXP (x, 0), + 0), 0))))) + { + rtx tem = XEXP (XEXP (XEXP (x, 0), 0), 0); + XEXP (XEXP (XEXP (x, 0), 0), 0) = reg; + if (memory_address_addr_space_p (GET_MODE (x), XEXP (x, 0), + MEM_ADDR_SPACE (x))) + { + XEXP (XEXP (XEXP (x, 0), 0), 0) = tem; + return &XEXP (XEXP (XEXP (x, 0), 0), 0); + } + XEXP (XEXP (XEXP (x, 0), 0), 0) = tem; + } + else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS + && OBJECT_P (XEXP (XEXP (XEXP (x, 0), 0), 0)) + && !OBJECT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) + && ! (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SUBREG + && OBJECT_P (SUBREG_REG (XEXP (XEXP (XEXP (x, 0), + 0), 1))))) + { + rtx tem = XEXP (XEXP (XEXP (x, 0), 0), 1); + XEXP (XEXP (XEXP (x, 0), 0), 1) = reg; + if (memory_address_addr_space_p (GET_MODE (x), XEXP (x, 0), + MEM_ADDR_SPACE (x))) + { + XEXP (XEXP (XEXP (x, 0), 0), 1) = tem; + return &XEXP (XEXP (XEXP (x, 0), 0), 1); + } + XEXP (XEXP (XEXP (x, 0), 0), 1) = tem; + } + /* If that didn't work, perhaps the first operand is complex and needs to be computed separately, so make a split point there. This will occur on machines that just support REG + CONST and have a constant moved through some previous computation. */ - - else if (!OBJECT_P (XEXP (XEXP (x, 0), 0)) - && ! (GET_CODE (XEXP (XEXP (x, 0), 0)) == SUBREG - && OBJECT_P (SUBREG_REG (XEXP (XEXP (x, 0), 0))))) + if (!OBJECT_P (XEXP (XEXP (x, 0), 0)) + && ! (GET_CODE (XEXP (XEXP (x, 0), 0)) == SUBREG + && OBJECT_P (SUBREG_REG (XEXP (XEXP (x, 0), 0))))) return &XEXP (XEXP (x, 0), 0); } --- gcc/testsuite/gcc.target/i386/pr54589.c.jj 2018-11-29 18:19:45.800619378 +0100 +++ gcc/testsuite/gcc.target/i386/pr54589.c 2018-11-29 18:19:39.034731419 +0100 @@ -0,0 +1,22 @@ +/* PR target/54589 */ +/* { dg-do compile { target { *-*-linux* && lp64 } } } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "movl\[ \t]+(?:t\\+336\\(%r..\\)|336\\(%r..,%r..\\)), %eax" } } */ +/* { dg-final { scan-assembler "movl\[ \t]+340\\(%r..,%r..\\), %eax" } } */ +/* { dg-final { scan-assembler-times "salq\[^\n\r]*4, %" 2 } } */ +/* { dg-final { scan-assembler-not "addq\[ \t]" } } */ + +struct S { int a, b, c, d; }; +struct T { struct S e[16]; struct S f[1024]; } t; + +int +foo (unsigned long x) +{ + return t.f[x + 5].a; +} + +int +bar (struct T *x, unsigned long y) +{ + return x->f[y + 5].b; +} Jakub