[llvm] 8d28a41 - [AMDGPU] Remove SIWholeQuadMode pass early exit (#98450)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 17 03:38:27 PDT 2024


Author: Carl Ritson
Date: 2024-07-17T19:38:23+09:00
New Revision: 8d28a4102b3668c75d061235c2890546757f4257

URL: https://github.com/llvm/llvm-project/commit/8d28a4102b3668c75d061235c2890546757f4257
DIFF: https://github.com/llvm/llvm-project/commit/8d28a4102b3668c75d061235c2890546757f4257.diff

LOG: [AMDGPU] Remove SIWholeQuadMode pass early exit (#98450)

Merge the code bypass elements from the early exit into the main pass
execution flow.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 3dc8cc17afc16..df7906ebd8a7e 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -219,11 +219,12 @@ class SIWholeQuadMode : public MachineFunctionPass {
   void lowerBlock(MachineBasicBlock &MBB);
   void processBlock(MachineBasicBlock &MBB, bool IsEntry);
 
-  void lowerLiveMaskQueries();
-  void lowerCopyInstrs();
-  void lowerKillInstrs(bool IsWQM);
+  bool lowerLiveMaskQueries();
+  bool lowerCopyInstrs();
+  bool lowerKillInstrs(bool IsWQM);
   void lowerInitExec(MachineInstr &MI);
-  MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry);
+  MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
+                                                  bool &Changed);
 
 public:
   static char ID;
@@ -796,6 +797,8 @@ MachineBasicBlock *SIWholeQuadMode::splitBlock(MachineBasicBlock *BB,
 
 MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
                                             MachineInstr &MI) {
+  assert(LiveMaskReg.isVirtual());
+
   const DebugLoc &DL = MI.getDebugLoc();
   unsigned Opcode = 0;
 
@@ -913,6 +916,8 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
 
 MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
                                            MachineInstr &MI, bool IsWQM) {
+  assert(LiveMaskReg.isVirtual());
+
   const DebugLoc &DL = MI.getDebugLoc();
   MachineInstr *MaskUpdateMI = nullptr;
 
@@ -1144,6 +1149,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
 void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator Before,
                               Register SaveWQM) {
+  assert(LiveMaskReg.isVirtual());
+
   bool IsTerminator = Before == MBB.end();
   if (!IsTerminator) {
     auto FirstTerm = MBB.getFirstTerminator();
@@ -1423,7 +1430,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
   assert(!SavedNonStrictReg);
 }
 
-void SIWholeQuadMode::lowerLiveMaskQueries() {
+bool SIWholeQuadMode::lowerLiveMaskQueries() {
   for (MachineInstr *MI : LiveMaskQueries) {
     const DebugLoc &DL = MI->getDebugLoc();
     Register Dest = MI->getOperand(0).getReg();
@@ -1435,9 +1442,10 @@ void SIWholeQuadMode::lowerLiveMaskQueries() {
     LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
     MI->eraseFromParent();
   }
+  return !LiveMaskQueries.empty();
 }
 
-void SIWholeQuadMode::lowerCopyInstrs() {
+bool SIWholeQuadMode::lowerCopyInstrs() {
   for (MachineInstr *MI : LowerToMovInstrs) {
     assert(MI->getNumExplicitOperands() == 2);
 
@@ -1492,9 +1500,10 @@ void SIWholeQuadMode::lowerCopyInstrs() {
                                 *MRI, MI->getOperand(0)));
     MI->setDesc(TII->get(CopyOp));
   }
+  return !LowerToCopyInstrs.empty() || !LowerToMovInstrs.empty();
 }
 
-void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
+bool SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
   for (MachineInstr *MI : KillInstrs) {
     MachineBasicBlock *MBB = MI->getParent();
     MachineInstr *SplitPoint = nullptr;
@@ -1510,6 +1519,7 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
     if (SplitPoint)
       splitBlock(MBB, SplitPoint);
   }
+  return !KillInstrs.empty();
 }
 
 void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
@@ -1601,7 +1611,7 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
 /// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
 /// for instructions that depend on EXEC.
 MachineBasicBlock::iterator
-SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
+SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
   MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI();
 
   for (MachineInstr *MI : InitExecInstrs) {
@@ -1612,6 +1622,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
       InsertPt = std::next(MI->getIterator());
 
     lowerInitExec(*MI);
+    Changed = true;
   }
 
   return InsertPt;
@@ -1664,48 +1675,50 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   }
 
   const char GlobalFlags = analyzeFunction(MF);
-  const bool NeedsLiveMask = !(KillInstrs.empty() && LiveMaskQueries.empty());
+  bool Changed = false;
 
   LiveMaskReg = Exec;
 
   MachineBasicBlock &Entry = MF.front();
-  MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry);
-
-  // Shader is simple does not need any state changes or any complex lowering
-  if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
-      LowerToMovInstrs.empty() && KillInstrs.empty()) {
-    lowerLiveMaskQueries();
-    if (!InitExecInstrs.empty())
-      LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
-    return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
-  }
+  MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry, Changed);
 
   // Store a copy of the original live mask when required
-  if (NeedsLiveMask || (GlobalFlags & StateWQM)) {
+  const bool HasLiveMaskQueries = !LiveMaskQueries.empty();
+  const bool HasWaveModes = GlobalFlags & ~StateExact;
+  const bool HasKills = !KillInstrs.empty();
+  const bool UsesWQM = GlobalFlags & StateWQM;
+  if (HasKills || UsesWQM || (HasWaveModes && HasLiveMaskQueries)) {
     LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
     MachineInstr *MI =
         BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
             .addReg(Exec);
     LIS->InsertMachineInstrInMaps(*MI);
+    Changed = true;
   }
 
   LLVM_DEBUG(printInfo());
 
-  lowerLiveMaskQueries();
-  lowerCopyInstrs();
+  Changed |= lowerLiveMaskQueries();
+  Changed |= lowerCopyInstrs();
 
-  // Shader only needs WQM
-  if (GlobalFlags == StateWQM) {
+  if (!HasWaveModes) {
+    // No wave mode execution
+    Changed |= lowerKillInstrs(false);
+  } else if (GlobalFlags == StateWQM) {
+    // Shader only needs WQM
     auto MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(WQMOpc), Exec)
                   .addReg(Exec);
     LIS->InsertMachineInstrInMaps(*MI);
     lowerKillInstrs(true);
+    Changed = true;
   } else {
+    // Wave mode switching requires full lowering pass.
     for (auto BII : Blocks)
       processBlock(*BII.first, BII.first == &Entry);
     // Lowering blocks causes block splitting so perform as a second pass.
     for (auto BII : Blocks)
       lowerBlock(*BII.first);
+    Changed = true;
   }
 
   // Compute live range for live mask
@@ -1721,5 +1734,5 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   if (!KillInstrs.empty() || !InitExecInstrs.empty())
     LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
 
-  return true;
+  return Changed;
 }


        


More information about the llvm-commits mailing list