[llvm] r257625 - AMDGPU/SI: Fix a GPU hang with POS_W_FLOAT enabled
Marek Olsak via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 13 09:23:20 PST 2016
Author: mareko
Date: Wed Jan 13 11:23:20 2016
New Revision: 257625
URL: http://llvm.org/viewvc/llvm-project?rev=257625&view=rev
Log:
AMDGPU/SI: Fix a GPU hang with POS_W_FLOAT enabled
Reviewers: tstellarAMD, arsenm
Subscribers: arsenm
Differential Revision: http://reviews.llvm.org/D16037
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/ret.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=257625&r1=257624&r2=257625&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jan 13 11:23:20 2016
@@ -646,8 +646,15 @@ SDValue SITargetLowering::LowerFormalArg
// based on run-time states. Since we can't know what the final PSInputEna
// will look like, so we shouldn't do anything here and the user should take
// responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
if (Info->getShaderType() == ShaderType::PIXEL &&
- (Info->getPSInputAddr() & 0x7F) == 0) {
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
Info->markPSInputAllocated(0);
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret.ll?rev=257625&r1=257624&r2=257625&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret.ll Wed Jan 13 11:23:20 2016
@@ -77,6 +77,23 @@ define float @ps_input_ena_no_inputs([9
; GCN: .long 165580
+; GCN-NEXT: .long 2081
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 2081
+; GCN-LABEL: {{^}}ps_input_ena_pos_w:
+; GCN-DAG: v_mov_b32_e32 v0, v4
+; GCN-DAG: v_mov_b32_e32 v1, v2
+; GCN: v_mov_b32_e32 v2, v3
+; GCN-NOT: s_endpgm
+define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+ %f = bitcast <2 x i32> %8 to <2 x float>
+ %s = insertvalue {float, <2 x float>} undef, float %14, 0
+ %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
+ ret {float, <2 x float>} %s1
+}
+
+
+; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 563
More information about the llvm-commits
mailing list