[PATCH] D47027: [AMDGPU] Fixed WWM bug in block otherwise entirely in WQM
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 17 12:48:00 PDT 2018
tpr created this revision.
Herald added subscribers: llvm-commits, t-tye, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl, arsenm.
For a block with WQM on entry and exit and containing no exact mode
code, but containing some WWM code, the WQM pass forgot to process the
block at all and so did not insert code to enter and leave WWM.
This commit fixes that.
Change-Id: I044792eead1293bed4203fb26ce75f47878afeb6
Repository:
rL LLVM
https://reviews.llvm.org/D47027
Files:
lib/Target/AMDGPU/SIWholeQuadMode.cpp
test/CodeGen/AMDGPU/wqm.ll
Index: test/CodeGen/AMDGPU/wqm.ll
===================================================================
--- test/CodeGen/AMDGPU/wqm.ll
+++ test/CodeGen/AMDGPU/wqm.ll
@@ -781,6 +781,36 @@
ret <4 x float> %r
}
+; Check a case of a block being entirely WQM except for a bit of WWM.
+; There was a bug where it forgot to enter and leave WWM.
+;
+;CHECK-LABEL: {{^}}test_wwm_within_wqm:
+;CHECK: %IF
+;CHECK: s_or_saveexec_b64 {{.*}}, -1
+;CHECK: ds_swizzle
+;
+define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
+main_body:
+ %c.bc = bitcast i32 %c to float
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %cmp = icmp eq i32 %z, 0
+ br i1 %cmp, label %IF, label %ENDIF
+
+IF:
+ %dataf = extractelement <4 x float> %dtex, i32 0
+ %data1 = fptosi float %dataf to i32
+ %data2 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %data1, i32 0)
+ %data3 = call i32 @llvm.amdgcn.ds.swizzle(i32 %data2, i32 2079)
+ %data4 = call i32 @llvm.amdgcn.wwm.i32(i32 %data3)
+ %data4f = sitofp i32 %data4 to float
+ br label %ENDIF
+
+ENDIF:
+ %r = phi float [ %data4f, %IF ], [ 0.0, %main_body ]
+ ret float %r
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
@@ -802,6 +832,7 @@
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -452,6 +452,11 @@
if (II.Needs != 0)
markInstructionUses(MI, II.Needs, Worklist);
+
+ // Ensure we process a block containing WWM, even if it does not require any
+ // WQM transitions.
+ if (II.Needs & StateWWM)
+ BI.Needs |= StateWWM;
}
void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D47027.147375.patch
Type: text/x-patch
Size: 2663 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180517/756e4585/attachment.bin>
More information about the llvm-commits
mailing list