On Wed, May 24, 2023 at 12:13 PM Richard Biener <rguent...@suse.de> wrote: > > The following dispatches to V2DImode CTOR expansion instead of > using sets of (subreg:DI (reg:V16QI 146) [08]) which causes > LRA to spill DImode and reload V16QImode. The same applies for > V8QImode or V4HImode construction from SImode parts which happens > during 32bit libgcc build. > > Boostrapped and tested on x86_64-unknown-linux-gnu. > > OK? > > Thanks, > Richard. > > PR target/109944 > * config/i386/i386-expand.cc (ix86_expand_vector_init_general): > Perform final vector composition using > ix86_expand_vector_init_general instead of setting > the highpart and lowpart which causes spilling. > > * gcc.target/i386/pr109944-1.c: New testcase. > * gcc.target/i386/pr109944-2.c: Likewise.
OK. Thanks, Uros. > --- > gcc/config/i386/i386-expand.cc | 11 ++++---- > gcc/testsuite/gcc.target/i386/pr109944-1.c | 30 ++++++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr109944-2.c | 17 ++++++++++++ > 3 files changed, 53 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr109944-2.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index ff3d382f1b4..19acd9c01f9 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -16367,11 +16367,12 @@ quarter: > emit_move_insn (target, gen_lowpart (mode, words[0])); > else if (n_words == 2) > { > - rtx tmp = gen_reg_rtx (mode); > - emit_clobber (tmp); > - emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]); > - emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]); > - emit_move_insn (target, tmp); > + gcc_assert (tmp_mode == DImode || tmp_mode == SImode); > + machine_mode concat_mode = tmp_mode == DImode ? V2DImode : V2SImode; > + rtx tmp = gen_reg_rtx (concat_mode); > + vals = gen_rtx_PARALLEL (concat_mode, gen_rtvec_v (2, words)); > + ix86_expand_vector_init_general (false, concat_mode, tmp, vals); > + emit_move_insn (target, gen_lowpart (mode, tmp)); > } > else if (n_words == 4) > { > diff --git a/gcc/testsuite/gcc.target/i386/pr109944-1.c > b/gcc/testsuite/gcc.target/i386/pr109944-1.c > new file mode 100644 > index 00000000000..d82214d9ebc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109944-1.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +void foo (char * __restrict a, char *b) > +{ > + a[0] = b[0]; > + a[1] = b[16]; > + a[2] = b[32]; > + a[3] = b[48]; > + a[4] = b[64]; > + a[5] = b[80]; > + a[6] = b[96]; > + a[7] = b[112]; > + a[8] = b[128]; > + a[9] = b[144]; > + a[10] = b[160]; > + a[11] = b[176]; > + a[12] = b[192]; > + a[13] = b[208]; > + a[14] = b[224]; > + a[15] = b[240]; > +} > + > +/* We do not want to generate a spill/reload for when the store is > vectorized. > + movq %rdx, -24(%rsp) > +... > + movq %rax, -16(%rsp) > + movdqa -24(%rsp), %xmm0 > + movups %xmm0, (%rdi) */ > +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr109944-2.c > b/gcc/testsuite/gcc.target/i386/pr109944-2.c > new file mode 100644 > index 00000000000..318dfab0250 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr109944-2.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2" } */ > + > +typedef char v16qi __attribute__((vector_size(16))); > +v16qi foo (char *b) > +{ > + return (v16qi){ b[0], b[16], b[32], b[48], b[64], b[80], b[96], b[112], > + b[128], b[144], b[160], b[176], b[192], b[208], b[224], b[240] }; > +} > + > +/* We do not want to generate a spill/reload > + movq %rdx, -24(%rsp) > +... > + movq %rax, -16(%rsp) > + movdqa -24(%rsp), %xmm0 > + movups %xmm0, (%rdi) */ > +/* { dg-final { scan-assembler-not "movdq\[^\r\n\]*\[bs\]p\\\), %xmm" } } */ > -- > 2.35.3