[PATCH] D124981: [AMDGPU] Enable WQM if demotes and softwqm are combined

Tue May 10 04:36:47 PDT 2022

critson updated this revision to Diff 428338.
critson added a comment.

- Rebase on to pre-committed test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124981/new/

https://reviews.llvm.org/D124981

Files:
  llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -287,6 +287,7 @@
 ; CHECK-LABEL: test_demote_1:
 ; CHECK:       ; %bb.0: ; %main_body
 ; CHECK-NEXT:    s_mov_b64 s[2:3], exec
+; CHECK-NEXT:    s_wqm_b64 exec, exec
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s0
 ; CHECK-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 idxen
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s1
@@ -297,10 +298,12 @@
 ; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[0:1]
 ; CHECK-NEXT:    s_cbranch_scc0 .LBB8_2
 ; CHECK-NEXT:  ; %bb.1: ; %main_body
-; CHECK-NEXT:    s_and_b64 exec, exec, s[2:3]
+; CHECK-NEXT:    s_wqm_b64 s[0:1], s[2:3]
+; CHECK-NEXT:    s_and_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_add_f32_e32 v0, v0, v1
 ; CHECK-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec
+; CHECK-NEXT:    s_and_b64 exec, exec, s[2:3]
 ; CHECK-NEXT:    s_branch .LBB8_3
 ; CHECK-NEXT:  .LBB8_2:
 ; CHECK-NEXT:    s_mov_b64 exec, 0
Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -489,6 +489,7 @@
   SmallVector<MachineInstr *, 4> SoftWQMInstrs;
   bool HasImplicitDerivatives =
       MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;
+  bool HasDemotes = false;
 
   // We need to visit the basic blocks in reverse post-order so that we visit
   // defs before uses, in particular so that we don't accidentally mark an
@@ -573,6 +574,8 @@
                    Opcode == AMDGPU::SI_DEMOTE_I1) {
           KillInstrs.push_back(&MI);
           BBI.NeedsLowering = true;
+          if (Opcode == AMDGPU::SI_DEMOTE_I1)
+            HasDemotes = true;
         } else if (WQMOutputs) {
           // The function is in machine SSA form, which means that physical
           // VGPRs correspond to shader inputs and outputs. Inputs are
@@ -601,6 +604,12 @@
     }
   }
 
+  // Demotes may be used to intentionally introduce new helper lanes.
+  // Enable WQM to facilitate this effect if there are operations which
+  // would change behaviour when run in WQM, i.e. SOFT_WQM instructions.
+  if (HasDemotes && !SoftWQMInstrs.empty())
+    GlobalFlags |= StateWQM;
+
   // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
   // ever used anywhere in the function. This implements the corresponding
   // semantics of @llvm.amdgcn.set.inactive.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124981.428338.patch
Type: text/x-patch
Size: 2636 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220510/7dbbec73/attachment.bin>