[PATCH] D95500: [AMDGPU] Fix WMM Entry SCC preservation

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 27 20:29:58 PST 2021


critson updated this revision to Diff 319747.
critson marked an inline comment as done.
critson added a comment.

Address comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95500/new/

https://reviews.llvm.org/D95500

Files:
  llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
  llvm/test/CodeGen/AMDGPU/wqm.mir


Index: llvm/test/CodeGen/AMDGPU/wqm.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.mir
+++ llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -49,6 +49,40 @@
 
 ...
 
+---
+# Second test for awareness that s_or_saveexec_b64 clobbers SCC
+# Because entry block is treated differently.
+#
+#CHECK: %bb.1
+#CHECK: S_CMP_LT_I32
+#CHECK: COPY $scc
+#CHECK: ENTER_WWM
+#CHECK: $scc = COPY
+#CHECK: S_CSELECT_B32
+name:            test_wwm_scc2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
+
+    %3:vgpr_32 = COPY $vgpr0
+    %2:sgpr_32 = COPY $sgpr2
+    %1:sgpr_32 = COPY $sgpr1
+    %0:sgpr_32 = COPY $sgpr0
+    %13:sgpr_128 = IMPLICIT_DEF
+
+  bb.1:
+    S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
+    %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
+    %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
+    %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
+    $vgpr0 = WWM %11:vgpr_32, implicit $exec
+    $vgpr1 = COPY %10:vgpr_32
+    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
+
+...
+
 ---
 # V_SET_INACTIVE, when its second operand is undef, is replaced by a
 # COPY by si-wqm. Ensure the instruction is removed.
Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -840,9 +840,26 @@
         First = FirstWQM;
       }
 
+      // Whether we need to save SCC depends on start and end states
+      bool SaveSCC = false;
+      switch (State) {
+      case StateExact:
+      case StateWWM:
+        // Exact/WWM -> WWM: save SCC
+        // Exact/WWM -> WQM: save SCC if WQM mask is generated from exec
+        // Exact/WWM -> Exact: no save
+        SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec);
+        break;
+      case StateWQM:
+        // WQM -> Exact/WMM: save SCC
+        SaveSCC = !(Needs & StateWQM);
+        break;
+      default:
+        llvm_unreachable("Unknown state");
+        break;
+      }
       MachineBasicBlock::iterator Before =
-          prepareInsertion(MBB, First, II, Needs == StateWQM,
-                           Needs == StateExact || WQMFromExec);
+          prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC);
 
       if (State == StateWWM) {
         assert(SavedNonWWMReg);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D95500.319747.patch
Type: text/x-patch
Size: 2618 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210128/e50308a4/attachment.bin>


More information about the llvm-commits mailing list