[llvm] r297556 - AMDGPU: Keep track of modifiers when converting v_mac to v_mad
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 10 21:40:40 PST 2017
Author: arsenm
Date: Fri Mar 10 23:40:40 2017
New Revision: 297556
URL: http://llvm.org/viewvc/llvm-project?rev=297556&view=rev
Log:
AMDGPU: Keep track of modifiers when converting v_mac to v_mad
Since v_max_f32_e64/v_max_f16_e64 can be folded if the target
instruction supports the clamp bit, we also need to maintain
modifiers when converting v_mac to v_mad.
This fixes a rendering issue with Dirt Rally because a v_mac
instruction with the clamp bit set was converted to a v_mad
but that bit was lost during the conversion.
Fixes: e184e01dd79 ("AMDGPU: Fold FP clamp as modifier bit")
Patch by Samuel Pitoiset <samuel.pitoiset at gmail.com>
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll
llvm/trunk/test/CodeGen/AMDGPU/omod.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=297556&r1=297555&r2=297556&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Mar 10 23:40:40 2017
@@ -1766,20 +1766,26 @@ MachineInstr *SIInstrInfo::convertToThre
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src0Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src1Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
.add(*Dst)
- .addImm(0) // Src0 mods
+ .addImm(Src0Mods ? Src0Mods->getImm() : 0)
.add(*Src0)
- .addImm(0) // Src1 mods
+ .addImm(Src1Mods ? Src1Mods->getImm() : 0)
.add(*Src1)
.addImm(0) // Src mods
.add(*Src2)
- .addImm(0) // clamp
- .addImm(0); // omod
+ .addImm(Clamp ? Clamp->getImm() : 0)
+ .addImm(Omod ? Omod->getImm() : 0);
}
// It's not generally safe to move VALU instructions across these since it will
Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll?rev=297556&r1=297555&r2=297556&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll Fri Mar 10 23:40:40 2017
@@ -168,6 +168,23 @@ define amdgpu_kernel void @v_clamp_add_s
ret void
}
+; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
+; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
+define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %b = load float, float addrspace(1)* %gep0
+
+ %mul = fmul float %a, %a
+ %add = fadd float %mul, %b
+ %max = call float @llvm.maxnum.f32(float %add, float 0.0)
+ %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
+ %res = fadd float %clamp, %b
+ store float %res, float addrspace(1)* %out.gep
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.floor.f32(float) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/omod.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/omod.ll?rev=297556&r1=297555&r2=297556&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/omod.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/omod.ll Fri Mar 10 23:40:40 2017
@@ -250,6 +250,17 @@ define amdgpu_ps void @v_omod_div2_f16_n
ret void
}
+; GCN-LABEL: {{^}}v_omod_mac_to_mad:
+; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}} mul:2{{$}}
+define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
+ %mul = fmul float %a, %a
+ %add = fadd float %mul, %b
+ %mad = fmul float %add, 2.0
+ %res = fmul float %mad, %b
+ store float %res, float addrspace(1)* undef
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.floor.f32(float) #1
More information about the llvm-commits
mailing list