[llvm-branch-commits] [llvm] [AMDGPU] Add wave reduce intrinsics for double types - 2 (PR #170812)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 8 13:20:09 PST 2026
================
@@ -5759,28 +5766,70 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
break;
}
case AMDGPU::V_ADD_F32_e64:
- case AMDGPU::V_SUB_F32_e64: {
- Register ActiveLanesVreg =
- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- Register DstVreg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ case AMDGPU::V_ADD_F64_e64:
+ case AMDGPU::V_SUB_F32_e64:
+ case AMDGPU::V_SUB_F16_e64: {
+ bool is32BitOpc = is32bitWaveReduceOperation(Opc);
+ const TargetRegisterClass *VregRC =
+ is32BitOpc ? &AMDGPU::VGPR_32RegClass : TRI->getVGPR64Class();
+ Register ActiveLanesVreg = MRI.createVirtualRegister(VregRC);
+ Register DstVreg = MRI.createVirtualRegister(VregRC);
// Get number of active lanes as a float val.
- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_CVT_F32_I32_e64),
+ BuildMI(BB, MI, DL,
+ TII->get(is32BitOpc ? AMDGPU::V_CVT_F32_I32_e64
+ : AMDGPU::V_CVT_F64_I32_e64),
ActiveLanesVreg)
.addReg(NewAccumulator->getOperand(0).getReg())
.addImm(0) // clamp
.addImm(0); // output-modifier
// Take negation of input for SUB reduction
- unsigned srcMod = Opc == AMDGPU::V_SUB_F32_e64 ? 1 : 0;
- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_MUL_F32_e64), DstVreg)
- .addImm(srcMod) // src0 modifier
- .addReg(SrcReg)
- .addImm(0) // src1 modifier
- .addReg(ActiveLanesVreg)
- .addImm(0) // clamp
- .addImm(0); // output-mod
- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
- .addReg(DstVreg);
+ unsigned srcMod =
+ Opc == AMDGPU::V_SUB_F32_e64 || Opc == AMDGPU::V_SUB_F16_e64 ? 1
+ : 0;
----------------
arsenm wrote:
This should use the enums for the modifiers
https://github.com/llvm/llvm-project/pull/170812
More information about the llvm-branch-commits
mailing list