Unnecessary moves around dpadd and dpsub are caused by different pseudos being assigned to the input-output operands which correspond to the same register.
Just like for the MSA multiply-accumulate instructions, this forces the same pseudo to the input-output operands, which removes unnecesary moves. Tested on mips-mti-linux-gnu. gcc/ChangeLog: * gcc/config/mips/mips.c (mips_expand_builtin_insn): Operands of DSP multiply-accumulate instructions which correspond to the same input-output register now have the same pseudo asigned to them. gcc/testsuite/ChangeLog: * gcc/testsuite/gcc.target/mips/mac-zero-reload.c: New test. --- gcc/config/mips/mips.c | 24 +++++++++++++++++++ gcc/testsuite/gcc.target/mips/mac-zero-reload.c | 32 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 gcc/testsuite/gcc.target/mips/mac-zero-reload.c diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index e337b82..3aa2c11 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -16994,6 +16994,30 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops, case CODE_FOR_msa_dpsub_u_w: case CODE_FOR_msa_dpsub_u_h: case CODE_FOR_msa_dpsub_u_d: + + case CODE_FOR_mips_dpau_h_qbl: + case CODE_FOR_mips_dpau_h_qbr: + case CODE_FOR_mips_dpsu_h_qbl: + case CODE_FOR_mips_dpsu_h_qbr: + case CODE_FOR_mips_dpaq_s_w_ph: + case CODE_FOR_mips_dpsq_s_w_ph: + case CODE_FOR_mips_mulsaq_s_w_ph: + case CODE_FOR_mips_dpaq_sa_l_w: + case CODE_FOR_mips_dpsq_sa_l_w: + case CODE_FOR_mips_maq_s_w_phl: + case CODE_FOR_mips_maq_s_w_phr: + case CODE_FOR_mips_maq_sa_w_phl: + case CODE_FOR_mips_maq_sa_w_phr: + + case CODE_FOR_mips_dpa_w_ph: + case CODE_FOR_mips_dps_w_ph: + case CODE_FOR_mips_mulsa_w_ph: + case CODE_FOR_mips_dpax_w_ph: + case CODE_FOR_mips_dpsx_w_ph: + case CODE_FOR_mips_dpaqx_s_w_ph: + case CODE_FOR_mips_dpaqx_sa_w_ph: + case CODE_FOR_mips_dpsqx_s_w_ph: + case CODE_FOR_mips_dpsqx_sa_w_ph: /* Force the operands which correspond to the same in-out register to have the same pseudo assigned to them. If the input operand is not REG, create one for it. */ diff --git a/gcc/testsuite/gcc.target/mips/mac-zero-reload.c b/gcc/testsuite/gcc.target/mips/mac-zero-reload.c new file mode 100644 index 0000000..a70dfb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/mac-zero-reload.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-fno-unroll-loops -mgp32 -mdspr2" } */ +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */ +/* { dg-final { scan-assembler-not "\tmflo\t" } } */ +/* { dg-final { scan-assembler-not "\tmfhi\t" } } */ +/* { dg-final { scan-assembler-not "\tmtlo\t" } } */ +/* { dg-final { scan-assembler-not "\tmthi\t" } } */ + +typedef short v2i16 __attribute__ ((vector_size(4))); + +extern v2i16 ps32Ptrl[4096]; + +extern int sink[4096]; + +int main(void) +{ + v2i16 v2i16_h0; + long long s64Acc; + + for (int i = 0; i < 4; ++i) + { + v2i16_h0 = ps32Ptrl[i]; + + s64Acc = 0; + + s64Acc = __builtin_mips_dpa_w_ph(s64Acc, v2i16_h0, v2i16_h0); + + sink[i] = __builtin_mips_extr_rs_w(s64Acc, 0); + } + + return 0; +} -- 2.7.4