[llvm-branch-commits] [llvm] [AMDGPU] DPP wave reduction for long types - 2 (PR #189225)
Juan Manuel Martinez CaamaƱo via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 14 01:09:38 PDT 2026
================
@@ -6311,40 +6316,81 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
DPPInstr.addImm(SISrcMods::NONE); // src1 modifier
if (!NeedsMovDPP)
DPPInstr.addReg(Src); // src1
+ if (AMDGPU::getNamedOperandIdx(DPPOpc, AMDGPU::OpName::clamp) >= 0)
+ DPPInstr.addImm(0); // clamp
DPPInstr
.addImm(DPPCtrl) // dpp-ctrl
.addImm(0xf) // row-mask
.addImm(0xf) // bank-mask
.addImm(0); // bound-control
};
- auto BuildClampInstr = [&](Register Dst, Register Src0, Register Src1) {
- auto ClampInstr = BuildMI(*CurrBB, MI, DL, TII->get(ClampOpc), Dst);
+ auto BuildClampInstr = [&](Register Dst, Register Src0, Register Src1,
+ bool isAddSub = false,
+ bool needsCarryIn = false,
+ Register CarryIn = Register()) {
+ unsigned InstrOpc = ClampOpc;
+ Register CarryOutReg = MRI.createVirtualRegister(WaveMaskRegClass);
+ if (needsCarryIn)
+ InstrOpc = AMDGPU::V_ADDC_U32_e64;
+ auto ClampInstr = BuildMI(*CurrBB, MI, DL, TII->get(InstrOpc), Dst);
if (isFPOp)
ClampInstr.addImm(SISrcMods::NONE); // src0 mod
+ if (isAddSub) {
+ if (needsCarryIn)
+ ClampInstr.addReg(CarryOutReg,
+ RegState::Define |
+ RegState::Dead); // killed carry-out reg
+ else
+ ClampInstr.addReg(CarryOutReg, RegState::Define); // carry-out reg
+ }
ClampInstr.addReg(Src0); // src0
if (isFPOp)
ClampInstr.addImm(SISrcMods::NONE); // src1 mod
ClampInstr.addReg(Src1); // src1
- if (TII->hasIntClamp(*ClampInstr) || TII->hasFPClamp(*ClampInstr))
+ if (needsCarryIn)
+ ClampInstr.addReg(CarryIn, RegState::Kill); // carry-in reg
+ if (AMDGPU::getNamedOperandIdx(InstrOpc, AMDGPU::OpName::clamp) >= 0)
ClampInstr.addImm(0); // clamp
if (isFPOp)
ClampInstr.addImm(0); // omod
LastBcastInstr = ClampInstr;
+ return CarryOutReg;
};
auto BuildPostDPPInstr = [&](Register Src0, Register Src1) {
- Register CmpMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
- Register MinMaxResultReg = MRI.createVirtualRegister(SrcRegClass);
- BuildMI(*CurrBB, MI, DL, TII->get(Opc), CmpMaskReg)
- .addReg(Src0) // src0
- .addReg(Src1); // src1
- LastBcastInstr =
- BuildMI(*CurrBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B64_PSEUDO),
- MinMaxResultReg)
- .addReg(Src1) // src0
- .addReg(Src0) // src1
- .addReg(CmpMaskReg); // src2
- expand64BitV_CNDMASK(*LastBcastInstr, CurrBB);
- return MinMaxResultReg;
+ bool isAddSubOpc =
+ Opc == AMDGPU::S_ADD_U64_PSEUDO || Opc == AMDGPU::S_SUB_U64_PSEUDO;
+ Register ReturnReg = MRI.createVirtualRegister(SrcRegClass);
+ if (isAddSubOpc) {
+ Register ResLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register ResHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineOperand Src0Operand =
+ MachineOperand::CreateReg(Src0, /*isDef=*/false);
+ MachineOperand Src1Operand =
+ MachineOperand::CreateReg(Src1, /*isDef=*/false);
+ auto [Src0Lo, Src0Hi] =
+ ExtractSubRegs(MI, Src0Operand, SrcRegClass, ST, MRI);
+ auto [Src1Lo, Src1Hi] =
+ ExtractSubRegs(MI, Src1Operand, SrcRegClass, ST, MRI);
+ Register CarryReg = BuildClampInstr(
+ ResLo, Src0Lo, Src1Lo, isAddSubOpc, /*needsCarryIn*/ false);
+ BuildClampInstr(ResHi, Src0Hi, Src1Hi, isAddSubOpc,
+ /*needsCarryIn*/ isAddSubOpc ? true : false,
----------------
jmmartinez wrote:
```suggestion
/*needsCarryIn*/ isAddSubOpc,
```
https://github.com/llvm/llvm-project/pull/189225
More information about the llvm-branch-commits
mailing list