[PATCH] D95500: [AMDGPU] Fix WMM Entry SCC preservation
Carl Ritson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 27 20:29:58 PST 2021
critson updated this revision to Diff 319747.
critson marked an inline comment as done.
critson added a comment.
Address comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D95500/new/
https://reviews.llvm.org/D95500
Files:
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/test/CodeGen/AMDGPU/wqm.mir
Index: llvm/test/CodeGen/AMDGPU/wqm.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.mir
+++ llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -49,6 +49,40 @@
...
+---
+# Second test for awareness that s_or_saveexec_b64 clobbers SCC
+# Because entry block is treated differently.
+#
+#CHECK: %bb.1
+#CHECK: S_CMP_LT_I32
+#CHECK: COPY $scc
+#CHECK: ENTER_WWM
+#CHECK: $scc = COPY
+#CHECK: S_CSELECT_B32
+name: test_wwm_scc2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
+
+ %3:vgpr_32 = COPY $vgpr0
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = IMPLICIT_DEF
+
+ bb.1:
+ S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
+ %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
+ %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
+ %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
+ $vgpr0 = WWM %11:vgpr_32, implicit $exec
+ $vgpr1 = COPY %10:vgpr_32
+ SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
+
+...
+
---
# V_SET_INACTIVE, when its second operand is undef, is replaced by a
# COPY by si-wqm. Ensure the instruction is removed.
Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -840,9 +840,26 @@
First = FirstWQM;
}
+ // Whether we need to save SCC depends on start and end states
+ bool SaveSCC = false;
+ switch (State) {
+ case StateExact:
+ case StateWWM:
+ // Exact/WWM -> WWM: save SCC
+ // Exact/WWM -> WQM: save SCC if WQM mask is generated from exec
+ // Exact/WWM -> Exact: no save
+ SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec);
+ break;
+ case StateWQM:
+ // WQM -> Exact/WMM: save SCC
+ SaveSCC = !(Needs & StateWQM);
+ break;
+ default:
+ llvm_unreachable("Unknown state");
+ break;
+ }
MachineBasicBlock::iterator Before =
- prepareInsertion(MBB, First, II, Needs == StateWQM,
- Needs == StateExact || WQMFromExec);
+ prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC);
if (State == StateWWM) {
assert(SavedNonWWMReg);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D95500.319747.patch
Type: text/x-patch
Size: 2618 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210128/e50308a4/attachment.bin>
More information about the llvm-commits
mailing list