[llvm] d45a247 - [AMDGPU] Don't remove VGPR to AGPR dead spills from frame info

Brendon Cahoon via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 23 09:12:34 PST 2021


Author: Brendon Cahoon
Date: 2021-12-23T11:09:19-06:00
New Revision: d45a2479989985937b016a4d66ffa1ed6c885613

URL: https://github.com/llvm/llvm-project/commit/d45a2479989985937b016a4d66ffa1ed6c885613
DIFF: https://github.com/llvm/llvm-project/commit/d45a2479989985937b016a4d66ffa1ed6c885613.diff

LOG: [AMDGPU] Don't remove VGPR to AGPR dead spills from frame info

Removing dead frame indices for VGPR to AGPR spills is incorrect
when the frame index is shared by multiple objects, which may
occur due to stack slot coloring. The problem is that subsequent
code that processes the other object will assert because the stack
frame index is marked dead.

Removing dead frame indices is needed prior to stack slot
coloring, which is what happens with SGPR to VGPR spills. These
spills are lowered prior to stack slot coloring, but the VGPR
to AGPR spills are processed afterwards during the Prolog/Epilog
Inserter pass. This patch marks the VGPR to AGPR spill slot as
dead if the slot is not used by another object.

Differential Revision: https://reviews.llvm.org/D115996

Added: 
    llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4706c74be721d..d4fe74ecb96e6 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1167,11 +1167,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
   if (SpillVGPRToAGPR) {
     // To track the spill frame indices handled in this pass.
     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
+    BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
 
     bool SeenDbgInstr = false;
 
     for (MachineBasicBlock &MBB : MF) {
       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+        int FrameIndex;
         if (MI.isDebugInstr())
           SeenDbgInstr = true;
 
@@ -1191,10 +1193,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
             SpillFIs.set(FI);
             continue;
           }
-        }
+        } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
+                   TII->isLoadFromStackSlot(MI, FrameIndex))
+          NonVGPRSpillFIs.set(FrameIndex);
       }
     }
 
+    // Stack slot coloring may assign 
diff erent objets to the same stack slot.
+    // If not, then the VGPR to AGPR spill slot is dead.
+    for (unsigned FI : SpillFIs.set_bits())
+      if (!NonVGPRSpillFIs.test(FI))
+        FuncInfo->setVGPRToAGPRSpillDead(FI);
+
     for (MachineBasicBlock &MBB : MF) {
       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
         MBB.addLiveIn(Reg);

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6f92ac6dac45c..3ce368ef4db95 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -444,7 +444,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
       MFI.setStackID(i, TargetStackID::Default);
 
   for (auto &R : VGPRToAGPRSpills) {
-    if (R.second.FullyAllocated)
+    if (R.second.IsDead)
       MFI.RemoveStackObject(R.first);
   }
 }

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index c305bc20e40d4..8accbf611c5f5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -465,6 +465,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   struct VGPRSpillToAGPR {
     SmallVector<MCPhysReg, 32> Lanes;
     bool FullyAllocated = false;
+    bool IsDead = false;
   };
 
   // Map WWM VGPR to a stack slot that is used to save/restore it in the
@@ -546,6 +547,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
                                          : I->second.Lanes[Lane];
   }
 
+  void setVGPRToAGPRSpillDead(int FrameIndex) {
+    auto I = VGPRToAGPRSpills.find(FrameIndex);
+    if (I != VGPRToAGPRSpills.end())
+      I->second.IsDead = true;
+  }
+
   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
                                  unsigned NumLane) const;
   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);

diff  --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
new file mode 100644
index 0000000000000..9cbdafe7f97cc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
@@ -0,0 +1,88 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s
+
+---
+name: same_slot_agpr_sgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
+machineFunctionInfo:
+  hasSpilledVGPRs: true
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: same_slot_agpr_sgpr
+    ; CHECK: liveins: $agpr0, $agpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
+    ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
+    ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = IMPLICIT_DEF
+    SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    $sgpr4_sgpr5 = IMPLICIT_DEF
+    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    S_ENDPGM 0
+...
+---
+name: 
diff _slot_agpr_sgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+  - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
+machineFunctionInfo:
+  hasSpilledVGPRs: true
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: 
diff _slot_agpr_sgpr
+    ; CHECK: liveins: $agpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
+    ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
+    ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = IMPLICIT_DEF
+    SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    $sgpr4_sgpr5 = IMPLICIT_DEF
+    SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    S_ENDPGM 0
+...
+---
+name: dead_vgpr_slot
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
+machineFunctionInfo:
+  hasSpilledVGPRs: true
+  stackPtrOffsetReg: $sgpr32
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: dead_vgpr_slot
+    ; CHECK: liveins: $agpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = IMPLICIT_DEF
+    SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list