PR109116 exposes an issue where using unspecs to access each vector component of an opaque mode variable leads to unneeded register copies, because our rtl optimizers cannot handle unspecs. Instead, use subregs to access each vector component of the opaque mode variable, which our optimizers know how to handle.
I did not include a test case with the patch, since writing a test case that attempts to ensure we don't emit unneeded register copies is nearly impossible since those copies can still be generated for reasons other than the causes in this patch. I have verified that this patch does improve code generation for some unit tests and our AI libraries team has confirmed that performance of their tests improved when using this patch. This passed bootstrap and regtesting with no regressions on powerpc64le-linux and powerpc64-linux. Ok for trunk? Peter gcc/ PR target/109116 * config/rs6000/mma.md (vsx_disassemble_pair): Expand into a vector register sized subreg. * config/rs6000/mma.md (*vsx_disassemble_pair): Delete. (mma_disassemble_acc): Expand into a vector register sized subreg. (*mma_disassemble_acc): Delete. * config/rs6000/rs6000.cc (rs6000_modes_tieable_p): Allow vector modes to tie with OOmode. diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 575751d477e..2ca405469e2 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -398,29 +398,8 @@ (define_expand "vsx_disassemble_pair" (match_operand 2 "const_0_to_1_operand")] "TARGET_MMA" { - rtx src; - int regoff = INTVAL (operands[2]); - src = gen_rtx_UNSPEC (V16QImode, - gen_rtvec (2, operands[1], GEN_INT (regoff)), - UNSPEC_MMA_EXTRACT); - emit_move_insn (operands[0], src); - DONE; -}) - -(define_insn_and_split "*vsx_disassemble_pair" - [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") - (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand 2 "const_0_to_1_operand")] - UNSPEC_MMA_EXTRACT))] - "TARGET_MMA - && vsx_register_operand (operands[1], OOmode)" - "#" - "&& reload_completed" - [(const_int 0)] -{ - int reg = REGNO (operands[1]); - int regoff = INTVAL (operands[2]); - rtx src = gen_rtx_REG (V16QImode, reg + regoff); + int regoff = INTVAL (operands[2]) * GET_MODE_SIZE (V16QImode); + rtx src = simplify_gen_subreg (V16QImode, operands[1], OOmode, regoff); emit_move_insn (operands[0], src); DONE; }) @@ -472,29 +451,8 @@ (define_expand "mma_disassemble_acc" (match_operand 2 "const_0_to_3_operand")] "TARGET_MMA" { - rtx src; - int regoff = INTVAL (operands[2]); - src = gen_rtx_UNSPEC (V16QImode, - gen_rtvec (2, operands[1], GEN_INT (regoff)), - UNSPEC_MMA_EXTRACT); - emit_move_insn (operands[0], src); - DONE; -}) - -(define_insn_and_split "*mma_disassemble_acc" - [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") - (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") - (match_operand 2 "const_0_to_3_operand")] - UNSPEC_MMA_EXTRACT))] - "TARGET_MMA - && fpr_reg_operand (operands[1], XOmode)" - "#" - "&& reload_completed" - [(const_int 0)] -{ - int reg = REGNO (operands[1]); - int regoff = INTVAL (operands[2]); - rtx src = gen_rtx_REG (V16QImode, reg + regoff); + int regoff = INTVAL (operands[2]) * GET_MODE_SIZE (V16QImode); + rtx src = simplify_gen_subreg (V16QImode, operands[1], XOmode, regoff); emit_move_insn (operands[0], src); DONE; }) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 5f56c3ed85b..f2efa46c147 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1964,9 +1964,12 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { - if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) - return mode1 == mode2; + if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode + || mode2 == PTImode || mode2 == XOmode) + return mode1 == mode2; + + if (mode2 == OOmode) + return ALTIVEC_OR_VSX_VECTOR_MODE (mode1); if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);