[llvm-branch-commits] [llvm] e2303a4 - [FastRA] Fix handling of bundled MIs

Sun Dec 20 23:15:40 PST 2020

Author: Pushpinder Singh
Date: 2020-12-21T02:10:55-05:00
New Revision: e2303a448e2fcc1d96d66e9ee9f0cfc009b69a3f

URL: https://github.com/llvm/llvm-project/commit/e2303a448e2fcc1d96d66e9ee9f0cfc009b69a3f
DIFF: https://github.com/llvm/llvm-project/commit/e2303a448e2fcc1d96d66e9ee9f0cfc009b69a3f.diff

LOG: [FastRA] Fix handling of bundled MIs

Fast register allocator skips bundled MIs, as the main assignment
loop uses MachineBasicBlock::iterator (= MachineInstrBundleIterator)
This was causing SIInsertWaitcnts to crash which expects all
instructions to have registers assigned.

This patch makes sure to set everything inside bundle to the same
assignments done on BUNDLE header.

Reviewed By: qcolombet

Differential Revision: https://reviews.llvm.org/D90369

Added: 
    llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll

Modified: 
    llvm/lib/CodeGen/RegAllocFast.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 09c4674e4be6..d6c5e11fd0c5 100644

--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -105,6 +105,9 @@ namespace {
     /// available in a physical register.
     LiveRegMap LiveVirtRegs;
 
+    /// Stores assigned virtual registers present in the bundle MI.
+    DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
     DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
     /// List of DBG_VALUE that we encountered without the vreg being assigned
     /// because they were placed after the last use of the vreg.
@@ -218,6 +221,8 @@ namespace {
 
     void allocateInstruction(MachineInstr &MI);
     void handleDebugValue(MachineInstr &MI);
+    void handleBundle(MachineInstr &MI);
+
     bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
     bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
     bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
@@ -889,6 +894,9 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
     LRI->LiveOut = false;
     LRI->Reloaded = false;
   }
+  if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+    BundleVirtRegsMap[VirtReg] = PhysReg;
+  }
   markRegUsedInInstr(PhysReg);
   setPhysReg(MI, MO, PhysReg);
 }
@@ -934,6 +942,10 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
   }
 
   LRI->LastUse = &MI;
+
+  if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+    BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+  }
   markRegUsedInInstr(LRI->PhysReg);
   setPhysReg(MI, MO, LRI->PhysReg);
 }
@@ -1064,6 +1076,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
   //   operands and early-clobbers.
 
   UsedInInstr.clear();
+  BundleVirtRegsMap.clear();
 
   // Scan for special cases; Apply pre-assigned register defs to state.
   bool HasPhysRegUse = false;
@@ -1382,6 +1395,30 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
   LiveDbgValueMap[Reg].push_back(&MI);
 }
 
+void RegAllocFast::handleBundle(MachineInstr &MI) {
+  MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
+  ++BundledMI;
+  while (BundledMI->isBundledWithPred()) {
+    for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
+      MachineOperand &MO = BundledMI->getOperand(I);
+      if (!MO.isReg())
+        continue;
+
+      Register Reg = MO.getReg();
+      if (!Reg.isVirtual())
+        continue;
+
+      DenseMap<Register, MCPhysReg>::iterator DI;
+      DI = BundleVirtRegsMap.find(Reg);
+      assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
+
+      setPhysReg(MI, MO, DI->second);
+    }
+
+    ++BundledMI;
+  }
+}
+
 void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
   LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -1411,6 +1448,12 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
     }
 
     allocateInstruction(MI);
+
+    // Once BUNDLE header is assigned registers, same assignments need to be
+    // done for bundled MIs.
+    if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+      handleBundle(MI);
+    }
   }
 
   LLVM_DEBUG(

diff  --git a/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
new file mode 100644
index 000000000000..dde48a97f152
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=regallocfast %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s
+
+---
+name: fast_regalloc_bundle_handling
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+body: |
+  bb.0:
+    ; GCN-LABEL: name: fast_regalloc_bundle_handling
+    ; GCN: renamable $vgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $vgpr1 = IMPLICIT_DEF
+    ; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr0, implicit killed renamable $vgpr1, implicit $exec {
+    ; GCN:   renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; GCN: }
+    ; GCN: S_ENDPGM 0, implicit killed renamable $vgpr0
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = BUNDLE implicit %0, implicit %1, implicit $exec {
+      %2 = V_ADD_U32_e32 %0, %1, implicit $exec
+    }
+    S_ENDPGM 0, implicit %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
new file mode 100644
index 000000000000..267bcee2aa2e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
+
+
+; MIR-LABEL: name: gws_barrier_offset0{{$}}
+; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec {
+; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource")
+; MIR-NEXT: S_WAITCNT 0
+; MIR-NEXT: }
+define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
+  call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
+  ret void
+}
+
+
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent inaccessiblememonly nounwind }