[llvm] 8e64d84 - [MachineSink] Check block prologue interference

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 21 19:16:51 PDT 2022


Author: Carl Ritson
Date: 2022-03-22T11:15:37+09:00
New Revision: 8e64d84995ddb317f8c79825069a3d2b0005551a

URL: https://github.com/llvm/llvm-project/commit/8e64d84995ddb317f8c79825069a3d2b0005551a
DIFF: https://github.com/llvm/llvm-project/commit/8e64d84995ddb317f8c79825069a3d2b0005551a.diff

LOG: [MachineSink] Check block prologue interference

Sinking must check for interference between the block prologue
and the instruction being sunk.
Specifically check for clobbering of uses by the prologue, and
overwrites to prologue defined registers by the sunk instruction.

Reviewed By: rampitec, ruiling

Differential Revision: https://reviews.llvm.org/D121277

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 995caae1599f4..966b012725d81 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1294,6 +1294,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
   return true;
 }
 
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(MachineBasicBlock *BB,
+                                    MachineBasicBlock::iterator End,
+                                    MachineInstr &MI,
+                                    const TargetRegisterInfo *TRI,
+                                    const TargetInstrInfo *TII,
+                                    const MachineRegisterInfo *MRI) {
+  if (BB->begin() == End)
+    return false; // no prologue
+  for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
+    // Only check target defined prologue instructions
+    if (!TII->isBasicBlockPrologue(*PI))
+      continue;
+    for (auto &MO : MI.operands()) {
+      if (!MO.isReg())
+        continue;
+      Register Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      if (MO.isUse()) {
+        if (Register::isPhysicalRegister(Reg) &&
+            (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
+          continue;
+        if (PI->modifiesRegister(Reg, TRI))
+          return true;
+      } else {
+        if (PI->readsRegister(Reg, TRI))
+          return true;
+        // Check for interference with non-dead defs
+        auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+        if (DefOp && !DefOp->isDead())
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// SinkInstruction - Determine whether it is safe to sink the specified machine
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1407,6 +1446,10 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   // Determine where to insert into. Skip phi nodes.
   MachineBasicBlock::iterator InsertPos =
       SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+  if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+    LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+    return false;
+  }
 
   // Collect debug users of any vreg that this inst defines.
   SmallVector<MIRegs, 4> DbgUsersToSink;
@@ -1805,11 +1848,19 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
     }
     auto DbgValsToSink = DbgValsToSinkMap.takeVector();
 
+    LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+    MachineBasicBlock::iterator InsertPos =
+        SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+    if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+      LLVM_DEBUG(
+          dbgs() << " *** Not sinking: prologue interference\n");
+      continue;
+    }
+
     // Clear the kill flag if SrcReg is killed between MI and the end of the
     // block.
     clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
-    MachineBasicBlock::iterator InsertPos =
-        SuccBB->SkipPHIsAndLabels(SuccBB->begin());
     performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
     updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 

diff  --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
index fee84198afccd..16edbf1d83378 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
@@ -5,6 +5,7 @@
 # past block prologues which would overwrite their uses.
 
 ---
+# Make sure COPY to $sgpr9 is not sunk after S_AND_SAVEEXEC_B64.
 name:            _amdgpu_ps_main
 alignment:       1
 tracksRegLiveness: true
@@ -17,16 +18,16 @@ body:             |
   ; GFX10-NEXT:   successors: %bb.1(0x80000000)
   ; GFX10-NEXT:   liveins: $sgpr4
   ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   renamable $sgpr9 = COPY $sgpr4
   ; GFX10-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
   ; GFX10-NEXT:   renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec
   ; GFX10-NEXT:   S_BRANCH %bb.1
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT: bb.1:
   ; GFX10-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GFX10-NEXT:   liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1
+  ; GFX10-NEXT:   liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT:   $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
-  ; GFX10-NEXT:   renamable $sgpr9 = COPY $sgpr4
   ; GFX10-NEXT:   renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
   ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
   ; GFX10-NEXT:   S_BRANCH %bb.2
@@ -70,3 +71,143 @@ body:             |
     S_ENDPGM 0
 
 ...
+---
+# Make sure COPY to $sgpr0_sgpr1 is not sunk after S_AND_SAVEEXEC_B64.
+name:            _amdgpu_ps_main2
+alignment:       1
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$sgpr4', virtual-reg: '' }
+  - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+body:             |
+  ; GFX10-LABEL: name: _amdgpu_ps_main2
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr6_sgpr7
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   renamable $sgpr9 = COPY $sgpr4
+  ; GFX10-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
+  ; GFX10-NEXT:   renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+  ; GFX10-NEXT:   S_BRANCH %bb.1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.2
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $m0 = COPY killed renamable $sgpr6
+  ; GFX10-NEXT:   S_BRANCH %bb.3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x80000000)
+    liveins: $sgpr4, $sgpr6_sgpr7
+
+    renamable $sgpr9 = COPY $sgpr4
+    renamable $vgpr5 = IMPLICIT_DEF
+    renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x40000000), %bb.8(0x40000000)
+    liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+
+    $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+    S_CBRANCH_EXECZ %bb.8, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.8(0x40000000)
+    liveins: $sgpr6
+
+    $m0 = COPY killed renamable $sgpr6
+    S_BRANCH %bb.8
+
+  bb.8:
+
+    S_ENDPGM 0
+
+...
+---
+# Make sure COPY to $sgpr2_sgpr3 is not sunk after S_AND_SAVEEXEC_B32.
+name:            _amdgpu_ps_main3
+alignment:       1
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+  - { reg: '$sgpr8', virtual-reg: '' }
+body:             |
+  ; GFX10-LABEL: name: _amdgpu_ps_main3
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr6_sgpr7, $sgpr8
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
+  ; GFX10-NEXT:   renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+  ; GFX10-NEXT:   renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+  ; GFX10-NEXT:   S_BRANCH %bb.1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+  ; GFX10-NEXT:   S_NOP 0, implicit $sgpr2_sgpr3
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.2
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $m0 = COPY killed renamable $sgpr6
+  ; GFX10-NEXT:   S_BRANCH %bb.3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x80000000)
+    liveins: $sgpr6_sgpr7, $sgpr8
+
+    renamable $vgpr5 = IMPLICIT_DEF
+    renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+    renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x40000000), %bb.8(0x40000000)
+    liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+    $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+    S_NOP 0, implicit $sgpr2_sgpr3
+    S_CBRANCH_EXECZ %bb.8, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.8(0x40000000)
+    liveins: $sgpr6
+
+    $m0 = COPY killed renamable $sgpr6
+    S_BRANCH %bb.8
+
+  bb.8:
+
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list