[llvm] 0824694 - [AMDGPU] Fix WMM Entry SCC preservation

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 28 17:05:57 PST 2021


Author: Carl Ritson
Date: 2021-01-29T10:05:36+09:00
New Revision: 0824694d68d3b4aa2d6c726e395516dd1387a718

URL: https://github.com/llvm/llvm-project/commit/0824694d68d3b4aa2d6c726e395516dd1387a718
DIFF: https://github.com/llvm/llvm-project/commit/0824694d68d3b4aa2d6c726e395516dd1387a718.diff

LOG: [AMDGPU] Fix WMM Entry SCC preservation

SCC was not correctly preserved when entering WWM.
Current lit test was unable to detect this as entry block is
handled differently.
Additionally fix an issue where SCC was unnecessarily preserved
when exiting from WWM to Exact mode.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D95500

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
    llvm/test/CodeGen/AMDGPU/wqm.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 0640e24b37ec..c6db339a67a8 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -840,9 +840,26 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
         First = FirstWQM;
       }
 
+      // Whether we need to save SCC depends on start and end states
+      bool SaveSCC = false;
+      switch (State) {
+      case StateExact:
+      case StateWWM:
+        // Exact/WWM -> WWM: save SCC
+        // Exact/WWM -> WQM: save SCC if WQM mask is generated from exec
+        // Exact/WWM -> Exact: no save
+        SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec);
+        break;
+      case StateWQM:
+        // WQM -> Exact/WMM: save SCC
+        SaveSCC = !(Needs & StateWQM);
+        break;
+      default:
+        llvm_unreachable("Unknown state");
+        break;
+      }
       MachineBasicBlock::iterator Before =
-          prepareInsertion(MBB, First, II, Needs == StateWQM,
-                           Needs == StateExact || WQMFromExec);
+          prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC);
 
       if (State == StateWWM) {
         assert(SavedNonWWMReg);

diff  --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 21fa8d87112d..bed8092a3dd0 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -49,6 +49,40 @@ body:             |
 
 ...
 
+---
+# Second test for awareness that s_or_saveexec_b64 clobbers SCC
+# Because entry block is treated 
diff erently.
+#
+#CHECK: %bb.1
+#CHECK: S_CMP_LT_I32
+#CHECK: COPY $scc
+#CHECK: ENTER_WWM
+#CHECK: $scc = COPY
+#CHECK: S_CSELECT_B32
+name:            test_wwm_scc2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0
+
+    %3:vgpr_32 = COPY $vgpr0
+    %2:sgpr_32 = COPY $sgpr2
+    %1:sgpr_32 = COPY $sgpr1
+    %0:sgpr_32 = COPY $sgpr0
+    %13:sgpr_128 = IMPLICIT_DEF
+
+  bb.1:
+    S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc
+    %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec
+    %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc
+    %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec
+    $vgpr0 = WWM %11:vgpr_32, implicit $exec
+    $vgpr1 = COPY %10:vgpr_32
+    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
+
+...
+
 ---
 # V_SET_INACTIVE, when its second operand is undef, is replaced by a
 # COPY by si-wqm. Ensure the instruction is removed.


        


More information about the llvm-commits mailing list