> gcc/ChangeLog: > > * config/i386/i386-expand.cc (emit_reduc_half): Use shuffles to > generate reduc half for V4SI, similar modes. > * config/i386/i386.h (TARGET_SSE_REDUCTION_PREFER_PSHUF): New Macro. > * config/i386/x86-tune.def (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF): > New tuning. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/reduc-pshuf.c: New test. > --- > gcc/config/i386/i386-expand.cc | 28 ++++++++++++++++++--- > gcc/config/i386/i386.h | 2 ++ > gcc/config/i386/x86-tune.def | 5 ++++ > gcc/testsuite/gcc.target/i386/reduc-pshuf.c | 14 +++++++++++ > 4 files changed, 46 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/reduc-pshuf.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 7fd03c88630..c7aec716a55 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -18724,9 +18724,31 @@ emit_reduc_half (rtx dest, rtx src, int i) > case E_V8HFmode: > case E_V4SImode: > case E_V2DImode: > - d = gen_reg_rtx (V1TImode); > - tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), > - GEN_INT (i / 2)); > + if (TARGET_SSE_REDUCTION_PREFER_PSHUF) { > + if (i == 128) { > + d = gen_reg_rtx(V4SImode); > + tem = gen_sse2_pshufd_1( > + d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(2), > + GEN_INT(3), GEN_INT(2), GEN_INT(3)); > + } else if (i == 64) { > + d = gen_reg_rtx(V4SImode); > + tem = gen_sse2_pshufd_1( > + d, force_reg(V4SImode, gen_lowpart(V4SImode, src)), GEN_INT(1), > + GEN_INT(1), GEN_INT(1), GEN_INT(1)); > + } else if (i == 32) { > + d = gen_reg_rtx(V8HImode); > + tem = gen_sse2_pshuflw_1( > + d, force_reg(V8HImode, gen_lowpart(V8HImode, src)), GEN_INT(1), > + GEN_INT(1), GEN_INT(1), GEN_INT(1)); > + } else { > + d = gen_reg_rtx(V1TImode); > + tem = > + gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / > 2)); > + } > + } else { > + d = gen_reg_rtx(V1TImode); > + tem = gen_sse2_lshrv1ti3(d, gen_lowpart(V1TImode, src), GEN_INT(i / > 2));
Instead of duplicating gen_sse2_lshrv1ti3 it is probably cleaner to simply break after each gen_sse_pshuw call and remove else. OK with that change Honza