[llvm] r314951 - AMDGPU: Do not fold clamp instructions when sources are different
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 4 17:13:17 PDT 2017
Author: arsenm
Date: Wed Oct 4 17:13:17 2017
New Revision: 314951
URL: http://llvm.org/viewvc/llvm-project?rev=314951&view=rev
Log:
AMDGPU: Do not fold clamp instructions when sources are different
Patch by hakzsam (Samuel Pitoiset)
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/test/CodeGen/AMDGPU/clamp.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=314951&r1=314950&r2=314951&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Wed Oct 4 17:13:17 2017
@@ -737,6 +737,7 @@ const MachineOperand *SIFoldOperands::is
const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (!Src0->isReg() || !Src1->isReg() ||
+ Src0->getReg() != Src1->getReg() ||
Src0->getSubReg() != Src1->getSubReg() ||
Src0->getSubReg() != AMDGPU::NoSubRegister)
return nullptr;
Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp.ll?rev=314951&r1=314950&r2=314951&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp.ll Wed Oct 4 17:13:17 2017
@@ -663,6 +663,28 @@ define amdgpu_kernel void @v_clamp_v2f16
ret void
}
+; GCN-LABEL: {{^}}v_clamp_diff_source_f32:
+; GCN: v_add_f32_e32 [[A:v[0-9]+]]
+; GCN: v_add_f32_e32 [[B:v[0-9]+]]
+; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}}
+define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
+{
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
+ %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
+ %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
+ %l0 = load float, float addrspace(1)* %gep0
+ %l1 = load float, float addrspace(1)* %gep1
+ %l2 = load float, float addrspace(1)* %gep2
+ %a = fadd nsz float %l0, %l1
+ %b = fadd nsz float %l0, %l2
+ %res = call nsz float @llvm.maxnum.f32(float %a, float %b)
+ %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
+ %min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 3
+ store float %min, float addrspace(1)* %out.gep
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.minnum.f32(float, float) #1
More information about the llvm-commits
mailing list