================
@@ -5189,10 +5196,54 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
.addReg(NewAccumulator->getOperand(0).getReg())
.addImm(1)
.setOperandDead(3); // Dead scc
- BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
- .addReg(SrcReg)
- .addReg(ParityRegister);
- break;
+ if (is32BitOpc) {
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+ .addReg(SrcReg)
+ .addReg(ParityRegister);
+ break;
+ } else {
+ Register DestSub0 =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register DestSub1 =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register Op1H_Op0L_Reg =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register CarryReg =
----------------
jmmartinez wrote:
> Re: s_bfe_i32 : either ways I will be emitting 2 instrs. I can try this tho.
Don't worry. It if was a `v_mul` there could be a difference according to the
sched-model, but I just checked and it seems be the same for all scalar
operations.
https://github.com/llvm/llvm-project/pull/151310
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits