[llvm] c3b3b99 - [AMDGPU] Avoid redundant mode register writes

Tim Corringham via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 24 06:12:53 PDT 2020


Author: Tim Corringham
Date: 2020-06-24T14:11:29+01:00
New Revision: c3b3b999ec9ef7e8d9367848db82c97a0369473f

URL: https://github.com/llvm/llvm-project/commit/c3b3b999ec9ef7e8d9367848db82c97a0369473f
DIFF: https://github.com/llvm/llvm-project/commit/c3b3b999ec9ef7e8d9367848db82c97a0369473f.diff

LOG: [AMDGPU] Avoid redundant mode register writes

Summary:
The SIModeRegister pass attempts to generate the minimal number of
writes to the mode register. However it was failing to correctly
deal with some loops, resulting in some redundant setreg instructions
being inserted.

This change amends the pass to avoid generating these redundant
instructions.

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82215

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIModeRegister.cpp
    llvm/test/CodeGen/AMDGPU/mode-register.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 40b3c9e8fde5..0e162ac42c11 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -108,7 +108,11 @@ class BlockData {
   // which is used in Phase 3 if we need to insert a mode change.
   MachineInstr *FirstInsertionPoint;
 
-  BlockData() : FirstInsertionPoint(nullptr){};
+  // A flag to indicate whether an Exit value has been set (we can't tell by
+  // examining the Exit value itself as all values may be valid results).
+  bool ExitSet;
+
+  BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
 };
 
 namespace {
@@ -129,6 +133,8 @@ class SIModeRegister : public MachineFunctionPass {
   Status DefaultStatus =
       Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
 
+  bool Changed = false;
+
 public:
   SIModeRegister() : MachineFunctionPass(ID) {}
 
@@ -199,6 +205,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
                 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
                 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
     ++NumSetregInserted;
+    Changed = true;
     InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
   }
 }
@@ -323,22 +330,49 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
 // exit value is propagated.
 void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
                                         const SIInstrInfo *TII) {
-  //  BlockData *BI = BlockInfo[MBB.getNumber()];
+  bool RevisitRequired = false;
+  bool ExitSet = false;
   unsigned ThisBlock = MBB.getNumber();
   if (MBB.pred_empty()) {
     // There are no predecessors, so use the default starting status.
     BlockInfo[ThisBlock]->Pred = DefaultStatus;
+    ExitSet = true;
   } else {
     // Build a status that is common to all the predecessors by intersecting
     // all the predecessor exit status values.
+    // Mask bits (which represent the Mode bits with a known value) can only be
+    // added by explicit SETREG instructions or the initial default value -
+    // the intersection process may remove Mask bits.
+    // If we find a predecessor that has not yet had an exit value determined
+    // (this can happen for example if a block is its own predecessor) we defer
+    // use of that value as the Mask will be all zero, and we will revisit this
+    // block again later (unless the only predecessor without an exit value is
+    // this block).
     MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
     MachineBasicBlock &PB = *(*P);
-    BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
+    unsigned PredBlock = PB.getNumber();
+    if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
+      BlockInfo[ThisBlock]->Pred = DefaultStatus;
+      ExitSet = true;
+    } else if (BlockInfo[PredBlock]->ExitSet) {
+      BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
+      ExitSet = true;
+    } else if (PredBlock != ThisBlock)
+      RevisitRequired = true;
 
     for (P = std::next(P); P != E; P = std::next(P)) {
       MachineBasicBlock *Pred = *P;
-      BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(
-          BlockInfo[Pred->getNumber()]->Exit);
+      unsigned PredBlock = Pred->getNumber();
+      if (BlockInfo[PredBlock]->ExitSet) {
+        if (BlockInfo[ThisBlock]->ExitSet) {
+          BlockInfo[ThisBlock]->Pred =
+              BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
+        } else {
+          BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
+        }
+        ExitSet = true;
+      } else if (PredBlock != ThisBlock)
+        RevisitRequired = true;
     }
   }
   Status TmpStatus =
@@ -354,6 +388,9 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
       Phase2List.push(&B);
     }
   }
+  BlockInfo[ThisBlock]->ExitSet = ExitSet;
+  if (RevisitRequired)
+    Phase2List.push(&MBB);
 }
 
 // In Phase 3 we revisit each block and if it has an insertion point defined we
@@ -361,7 +398,6 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
 // not we insert an appropriate setreg instruction to modify the Mode register.
 void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
                                         const SIInstrInfo *TII) {
-  //  BlockData *BI = BlockInfo[MBB.getNumber()];
   unsigned ThisBlock = MBB.getNumber();
   if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
     Status Delta =
@@ -402,5 +438,5 @@ bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
 
   BlockInfo.clear();
 
-  return NumSetregInserted > 0;
+  return Changed;
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/mode-register.mir b/llvm/test/CodeGen/AMDGPU/mode-register.mir
index 753e6a3ce0a7..382050bc3888 100644
--- a/llvm/test/CodeGen/AMDGPU/mode-register.mir
+++ b/llvm/test/CodeGen/AMDGPU/mode-register.mir
@@ -457,3 +457,55 @@ body: |
   bb.4:
     S_ENDPGM 0
 ...
+---
+# checks for bug where if a block is its own predecessor it could cause mode tracking
+# to produce the wrong mode, resulting in an unnecessary setreg
+# CHECK-LABEL: name: single_block_loop
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: S_SETREG
+
+name: single_block_loop
+
+body: |
+  bb.0:
+    successors: %bb.1
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3
+    liveins: $vgpr1_vgpr2
+    $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_ENDPGM 0
+...
+---
+# checks for a bug where if the first block is its own predecessor the initial mode was
+# not correctly propagated, resulting in an unnecessary setreg
+# CHECK-LABEL: name: first_block_loop
+# CHECK-LABEL: bb.0:
+# CHECK-NOT: S_SETREG
+
+name: first_block_loop
+
+body: |
+  bb.0:
+    successors: %bb.0, %bb.1
+    S_CBRANCH_VCCZ %bb.0, implicit $vcc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2
+    liveins: $vgpr1_vgpr2
+    $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list