[PATCH] D101966: [AMDGPU] Fix WQM failure with single block inactive demote
Carl Ritson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 6 05:03:08 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rG67cfefebbbbb: [AMDGPU] Fix WQM failure with single block inactive demote (authored by critson).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D101966/new/
https://reviews.llvm.org/D101966
Files:
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -1184,6 +1184,44 @@
ret void
}
+define amdgpu_ps void @static_exact_nop(float %arg0, float %arg1) {
+; SI-LABEL: static_exact_nop:
+; SI: ; %bb.0: ; %.entry
+; SI-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
+; SI-NEXT: s_endpgm
+;
+; GFX9-LABEL: static_exact_nop:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-32-LABEL: static_exact_nop:
+; GFX10-32: ; %bb.0: ; %.entry
+; GFX10-32-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v0
+; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
+; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
+; GFX10-32-NEXT: s_endpgm
+;
+; GFX10-64-LABEL: static_exact_nop:
+; GFX10-64: ; %bb.0: ; %.entry
+; GFX10-64-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0
+; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
+; GFX10-64-NEXT: s_endpgm
+.entry:
+ %c0 = fcmp olt float %arg0, 0.000000e+00
+ %c1 = fcmp oge float %arg1, 0.0
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+ ret void
+}
+
+
declare void @llvm.amdgcn.wqm.demote(i1) #0
declare i1 @llvm.amdgcn.live.mask() #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -923,7 +923,7 @@
} else {
// Static: kill does nothing
MachineInstr *NewTerm = nullptr;
- if (IsDemote) {
+ if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
LIS->RemoveMachineInstrFromMaps(MI);
} else {
assert(MBB.succ_size() == 1);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101966.343362.patch
Type: text/x-patch
Size: 2356 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210506/66cd9361/attachment.bin>
More information about the llvm-commits
mailing list