[llvm] r333362 - [AMDGPU] Fixed WWM bug in block otherwise entirely in WQM
Tim Renouf via llvm-commits
llvm-commits at lists.llvm.org
Sun May 27 10:26:11 PDT 2018
Author: tpr
Date: Sun May 27 10:26:11 2018
New Revision: 333362
URL: http://llvm.org/viewvc/llvm-project?rev=333362&view=rev
Log:
[AMDGPU] Fixed WWM bug in block otherwise entirely in WQM
Summary:
For a block with WQM on entry and exit and containing no exact mode
code, but containing some WWM code, the WQM pass forgot to process the
block at all and so did not insert code to enter and leave WWM.
This commit fixes that.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D47027
Change-Id: I044792eead1293bed4203fb26ce75f47878afeb6
Modified:
llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp?rev=333362&r1=333361&r2=333362&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp Sun May 27 10:26:11 2018
@@ -453,6 +453,11 @@ void SIWholeQuadMode::propagateInstructi
if (II.Needs != 0)
markInstructionUses(MI, II.Needs, Worklist);
+
+ // Ensure we process a block containing WWM, even if it does not require any
+ // WQM transitions.
+ if (II.Needs & StateWWM)
+ BI.Needs |= StateWWM;
}
void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=333362&r1=333361&r2=333362&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Sun May 27 10:26:11 2018
@@ -746,6 +746,36 @@ end:
ret <4 x float> %r
}
+; Check a case of a block being entirely WQM except for a bit of WWM.
+; There was a bug where it forgot to enter and leave WWM.
+;
+;CHECK-LABEL: {{^}}test_wwm_within_wqm:
+;CHECK: %IF
+;CHECK: s_or_saveexec_b64 {{.*}}, -1
+;CHECK: ds_swizzle
+;
+define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
+main_body:
+ %c.bc = bitcast i32 %c to float
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %cmp = icmp eq i32 %z, 0
+ br i1 %cmp, label %IF, label %ENDIF
+
+IF:
+ %dataf = extractelement <4 x float> %dtex, i32 0
+ %data1 = fptosi float %dataf to i32
+ %data2 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %data1, i32 0)
+ %data3 = call i32 @llvm.amdgcn.ds.swizzle(i32 %data2, i32 2079)
+ %data4 = call i32 @llvm.amdgcn.wwm.i32(i32 %data3)
+ %data4f = sitofp i32 %data4 to float
+ br label %ENDIF
+
+ENDIF:
+ %r = phi float [ %data4f, %IF ], [ 0.0, %main_body ]
+ ret float %r
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
@@ -767,6 +797,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrt
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
More information about the llvm-commits
mailing list