[PATCH] D47027: [AMDGPU] Fixed WWM bug in block otherwise entirely in WQM
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun May 27 10:32:12 PDT 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL333362: [AMDGPU] Fixed WWM bug in block otherwise entirely in WQM (authored by tpr, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D47027?vs=147375&id=148758#toc
Repository:
rL LLVM
https://reviews.llvm.org/D47027
Files:
llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
@@ -746,6 +746,36 @@
ret <4 x float> %r
}
+; Check a case of a block being entirely WQM except for a bit of WWM.
+; There was a bug where it forgot to enter and leave WWM.
+;
+;CHECK-LABEL: {{^}}test_wwm_within_wqm:
+;CHECK: %IF
+;CHECK: s_or_saveexec_b64 {{.*}}, -1
+;CHECK: ds_swizzle
+;
+define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
+main_body:
+ %c.bc = bitcast i32 %c to float
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %cmp = icmp eq i32 %z, 0
+ br i1 %cmp, label %IF, label %ENDIF
+
+IF:
+ %dataf = extractelement <4 x float> %dtex, i32 0
+ %data1 = fptosi float %dataf to i32
+ %data2 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %data1, i32 0)
+ %data3 = call i32 @llvm.amdgcn.ds.swizzle(i32 %data2, i32 2079)
+ %data4 = call i32 @llvm.amdgcn.wwm.i32(i32 %data3)
+ %data4f = sitofp i32 %data4 to float
+ br label %ENDIF
+
+ENDIF:
+ %r = phi float [ %data4f, %IF ], [ 0.0, %main_body ]
+ ret float %r
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
@@ -767,6 +797,7 @@
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -453,6 +453,11 @@
if (II.Needs != 0)
markInstructionUses(MI, II.Needs, Worklist);
+
+ // Ensure we process a block containing WWM, even if it does not require any
+ // WQM transitions.
+ if (II.Needs & StateWWM)
+ BI.Needs |= StateWWM;
}
void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D47027.148758.patch
Type: text/x-patch
Size: 2729 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180527/ca979a48/attachment.bin>
More information about the llvm-commits
mailing list