[llvm] [AMDGPU] Add wave reduce intrinsics for double types - 1 (PR #170811)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 13 02:09:18 PST 2025
================
@@ -5968,6 +5958,60 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
.addReg(Accumulator->getOperand(0).getReg());
break;
}
+ case AMDGPU::V_MIN_F64_e64:
+ case AMDGPU::V_MAX_F64_e64: {
+ const TargetRegisterClass *VregRC = TRI->getVGPR64Class();
+ const TargetRegisterClass *VregSubRC =
+ TRI->getSubRegisterClass(VregRC, AMDGPU::sub0);
+ Register AccumulatorVReg = MRI.createVirtualRegister(VregRC);
+ Register DstVreg = MRI.createVirtualRegister(VregRC);
+ Register LaneValLo =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ Register LaneValHi =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::V_MOV_B64_PSEUDO),
+ AccumulatorVReg)
+ .addReg(Accumulator->getOperand(0).getReg());
+ if (ST.getGeneration() == AMDGPUSubtarget::Generation::GFX12) {
+ switch (Opc) {
+ case AMDGPU::V_MIN_F64_e64:
+ Opc = AMDGPU::V_MIN_NUM_F64_e64;
+ break;
+ case AMDGPU::V_MAX_F64_e64:
+ Opc = AMDGPU::V_MAX_NUM_F64_e64;
+ break;
+ }
----------------
arsenm wrote:
These are not equivalent operations, despite the gfx12 manual claiming "v_min_f64" is merely an alias. They have different snan behavior. Do we have a specification for what order this reduction occurs in?
With v_min_f64 with the mode IEEE=1 `<0, sNaN, 1>`, reduced from the left, will return 1. With v_min_num_f64, this will return 0.
https://github.com/llvm/llvm-project/pull/170811
More information about the llvm-commits
mailing list