[llvm-branch-commits] [llvm] [AMDGPU] DPP wave reduction for long types - 2 (PR #189225)

Juan Manuel Martinez CaamaƱo via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Apr 14 01:09:38 PDT 2026


================
@@ -6311,40 +6316,81 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
           DPPInstr.addImm(SISrcMods::NONE); // src1 modifier
         if (!NeedsMovDPP)
           DPPInstr.addReg(Src); // src1
+        if (AMDGPU::getNamedOperandIdx(DPPOpc, AMDGPU::OpName::clamp) >= 0)
+          DPPInstr.addImm(0); // clamp
         DPPInstr
             .addImm(DPPCtrl) // dpp-ctrl
             .addImm(0xf)     // row-mask
             .addImm(0xf)     // bank-mask
             .addImm(0);      // bound-control
       };
-      auto BuildClampInstr = [&](Register Dst, Register Src0, Register Src1) {
-        auto ClampInstr = BuildMI(*CurrBB, MI, DL, TII->get(ClampOpc), Dst);
+      auto BuildClampInstr = [&](Register Dst, Register Src0, Register Src1,
+                                 bool isAddSub = false,
+                                 bool needsCarryIn = false,
+                                 Register CarryIn = Register()) {
+        unsigned InstrOpc = ClampOpc;
+        Register CarryOutReg = MRI.createVirtualRegister(WaveMaskRegClass);
+        if (needsCarryIn)
+          InstrOpc = AMDGPU::V_ADDC_U32_e64;
+        auto ClampInstr = BuildMI(*CurrBB, MI, DL, TII->get(InstrOpc), Dst);
         if (isFPOp)
           ClampInstr.addImm(SISrcMods::NONE); // src0 mod
+        if (isAddSub) {
+          if (needsCarryIn)
+            ClampInstr.addReg(CarryOutReg,
+                              RegState::Define |
+                                  RegState::Dead); // killed carry-out reg
+          else
+            ClampInstr.addReg(CarryOutReg, RegState::Define); // carry-out reg
+        }
         ClampInstr.addReg(Src0);              // src0
         if (isFPOp)
           ClampInstr.addImm(SISrcMods::NONE); // src1 mod
         ClampInstr.addReg(Src1);              // src1
-        if (TII->hasIntClamp(*ClampInstr) || TII->hasFPClamp(*ClampInstr))
+        if (needsCarryIn)
+          ClampInstr.addReg(CarryIn, RegState::Kill); // carry-in reg
+        if (AMDGPU::getNamedOperandIdx(InstrOpc, AMDGPU::OpName::clamp) >= 0)
           ClampInstr.addImm(0); // clamp
         if (isFPOp)
           ClampInstr.addImm(0); // omod
         LastBcastInstr = ClampInstr;
+        return CarryOutReg;
       };
       auto BuildPostDPPInstr = [&](Register Src0, Register Src1) {
-        Register CmpMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
-        Register MinMaxResultReg = MRI.createVirtualRegister(SrcRegClass);
-        BuildMI(*CurrBB, MI, DL, TII->get(Opc), CmpMaskReg)
-            .addReg(Src0)  // src0
-            .addReg(Src1); // src1
-        LastBcastInstr =
-            BuildMI(*CurrBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B64_PSEUDO),
-                    MinMaxResultReg)
-                .addReg(Src1)        // src0
-                .addReg(Src0)        // src1
-                .addReg(CmpMaskReg); // src2
-        expand64BitV_CNDMASK(*LastBcastInstr, CurrBB);
-        return MinMaxResultReg;
+        bool isAddSubOpc =
+            Opc == AMDGPU::S_ADD_U64_PSEUDO || Opc == AMDGPU::S_SUB_U64_PSEUDO;
+        Register ReturnReg = MRI.createVirtualRegister(SrcRegClass);
+        if (isAddSubOpc) {
+          Register ResLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+          Register ResHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+          MachineOperand Src0Operand =
+              MachineOperand::CreateReg(Src0, /*isDef=*/false);
+          MachineOperand Src1Operand =
+              MachineOperand::CreateReg(Src1, /*isDef=*/false);
+          auto [Src0Lo, Src0Hi] =
+              ExtractSubRegs(MI, Src0Operand, SrcRegClass, ST, MRI);
+          auto [Src1Lo, Src1Hi] =
+              ExtractSubRegs(MI, Src1Operand, SrcRegClass, ST, MRI);
+          Register CarryReg = BuildClampInstr(
+              ResLo, Src0Lo, Src1Lo, isAddSubOpc, /*needsCarryIn*/ false);
+          BuildClampInstr(ResHi, Src0Hi, Src1Hi, isAddSubOpc,
+                          /*needsCarryIn*/ isAddSubOpc ? true : false,
----------------
jmmartinez wrote:

```suggestion
                          /*needsCarryIn*/ isAddSubOpc,
```

https://github.com/llvm/llvm-project/pull/189225


More information about the llvm-branch-commits mailing list