[llvm] r301197 - AMDGPU: Move v_readlane lane select from VGPR to SGPR
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 24 10:17:37 PDT 2017
Author: nha
Date: Mon Apr 24 12:17:36 2017
New Revision: 301197
URL: http://llvm.org/viewvc/llvm-project?rev=301197&view=rev
Log:
AMDGPU: Move v_readlane lane select from VGPR to SGPR
Summary:
Fix a compiler bug when the lane select happens to end up in a VGPR.
Clarify the semantic of the corresponding intrinsic to be that of
the corresponding GLSL: the lane select must be uniform across a
wave front, otherwise results are undefined.
Reviewers: arsenm
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D32343
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=301197&r1=301196&r2=301197&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Mon Apr 24 12:17:36 2017
@@ -629,6 +629,8 @@ def int_amdgcn_readfirstlane :
GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+// The lane argument must be uniform across the currently active threads of the
+// current wave. Otherwise, the result is undefined.
def int_amdgcn_readlane :
GCCBuiltin<"__builtin_amdgcn_readlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=301197&r1=301196&r2=301197&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Mon Apr 24 12:17:36 2017
@@ -2640,6 +2640,19 @@ void SIInstrInfo::legalizeOperandsVOP2(M
if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
return;
+ // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
+ // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
+ // select is uniform.
+ if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
+ RI.isVGPR(MRI, Src1.getReg())) {
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src1);
+ Src1.ChangeToRegister(Reg, false);
+ return;
+ }
+
// We do not use commuteInstruction here because it is too aggressive and will
// commute if it is possible. We only want to commute here if it improves
// legality. This can be called a fairly large number of times so don't waste
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll?rev=301197&r1=301196&r2=301197&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll Mon Apr 24 12:17:36 2017
@@ -19,6 +19,20 @@ define amdgpu_kernel void @test_readlane
ret void
}
+; CHECK-LABEL: {{^}}test_readlane_vregs:
+; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
+; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]]
+define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
+ %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in
+ %value = extractelement <2 x i32> %args, i32 0
+ %lane = extractelement <2 x i32> %args, i32 1
+ %readlane = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %lane)
+ store i32 %readlane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; TODO: m0 should be folded.
; CHECK-LABEL: {{^}}test_readlane_m0_sreg:
; CHECK: s_mov_b32 m0, -1
@@ -40,5 +54,8 @@ define amdgpu_kernel void @test_readlane
ret void
}
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
attributes #0 = { nounwind readnone convergent }
attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
More information about the llvm-commits
mailing list