On Wed, May 24, 2023 at 12:13 PM Richard Biener <rguent...@suse.de> wrote:
>
> The following dispatches to V2DImode CTOR expansion instead of
> using sets of (subreg:DI (reg:V16QI 146) [08]) which causes
> LRA to spill DImode and reload V16QImode.  The same applies for
> V8QImode or V4HImode construction from SImode parts which happens
> during 32bit libgcc build.
>
> Boostrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
>         PR target/109944
>         * config/i386/i386-expand.cc (ix86_expand_vector_init_general):
>         Perform final vector composition using
>         ix86_expand_vector_init_general instead of setting
>         the highpart and lowpart which causes spilling.
>
>         * gcc.target/i386/pr109944-1.c: New testcase.
>         * gcc.target/i386/pr109944-2.c: Likewise.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc             | 11 ++++----
>  gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 ++++++++++++
>  3 files changed, 53 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-2.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index ff3d382f1b4..19acd9c01f9 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -16367,11 +16367,12 @@ quarter:
>         emit_move_insn (target, gen_lowpart (mode, words[0]));
>        else if (n_words == 2)
>         {
> -         rtx tmp = gen_reg_rtx (mode);
> -         emit_clobber (tmp);
> -         emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
> -         emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
> -         emit_move_insn (target, tmp);
> +         gcc_assert (tmp_mode == DImode || tmp_mode == SImode);
> +         machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode;
> +         rtx tmp = gen_reg_rtx (concat_mode);
> +         vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words));
> +         ix86_expand_vector_init_general (false, concat_mode, tmp, vals);
> +         emit_move_insn (target, gen_lowpart (mode, tmp));
>         }
>        else if (n_words == 4)
>         {
> diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c 
> b/gcc/testsuite/gcc.target/i386/pr109944-1.c
> new file mode 100644
> index 00000000000..d82214d9ebc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void foo (char * __restrict a, char *b)
> +{
> +  a[0] = b[0];
> +  a[1] = b[16];
> +  a[2] = b[32];
> +  a[3] = b[48];
> +  a[4] = b[64];
> +  a[5] = b[80];
> +  a[6] = b[96];
> +  a[7] = b[112];
> +  a[8] = b[128];
> +  a[9] = b[144];
> +  a[10] = b[160];
> +  a[11] = b[176];
> +  a[12] = b[192];
> +  a[13] = b[208];
> +  a[14] = b[224];
> +  a[15] = b[240];
> +}
> +
> +/* We do not want to generate a spill/reload for when the store is 
> vectorized.
> +        movq    %rdx, -24(%rsp)
> +...
> +        movq    %rax, -16(%rsp)
> +        movdqa  -24(%rsp), %xmm0
> +        movups  %xmm0, (%rdi)  */
> +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c 
> b/gcc/testsuite/gcc.target/i386/pr109944-2.c
> new file mode 100644
> index 00000000000..318dfab0250
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2" } */
> +
> +typedef char v16qi __attribute__((vector_size(16)));
> +v16qi foo (char *b)
> +{
> +  return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112],
> +      b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] };
> +}
> +
> +/* We do not want to generate a spill/reload
> +        movq    %rdx, -24(%rsp)
> +...
> +        movq    %rax, -16(%rsp)
> +        movdqa  -24(%rsp), %xmm0
> +        movups  %xmm0, (%rdi)  */
> +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */
> --
> 2.35.3

Reply via email to