[llvm] 9145d75 - AMDGPU: Fix incorrectly deleting copies after spilling SGPR tuples
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 28 14:50:44 PDT 2020
Author: Matt Arsenault
Date: 2020-08-28T17:50:37-04:00
New Revision: 9145d75226a071c530d14e051008e4c32e87cf5e
URL: https://github.com/llvm/llvm-project/commit/9145d75226a071c530d14e051008e4c32e87cf5e
DIFF: https://github.com/llvm/llvm-project/commit/9145d75226a071c530d14e051008e4c32e87cf5e.diff
LOG: AMDGPU: Fix incorrectly deleting copies after spilling SGPR tuples
The implicit def of the super register would appear to kill any live
uses of components before the spill, and would be deleted by
MachineCopyPropagation. We need to add implicit uses of the super
register, similarly to what copyPhysReg does. VGPR tuples appear to be
correctly handled already. I need to double check the SGPR->memory
path.
Added:
llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/spill192.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index fd8336458a0a..8a9899988b4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -999,7 +999,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- DenseSet<Register> SGPRSpillVGPRDefinedSet;
+ DenseSet<Register> SGPRSpillVGPRDefinedSet; // FIXME: This should be removed
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@@ -1032,6 +1032,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+ bool UseKill = IsKill && i == NumSubRegs - 1;
+
// During SGPR spilling to VGPR, determine if the VGPR is defined. The
// only circumstance in which we say it is undefined is when it is the
// first spill to this VGPR in the first basic block.
@@ -1044,7 +1046,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
auto MIB = BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
- .addReg(SubReg, getKillRegState(IsKill))
+ .addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
@@ -1054,6 +1056,9 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MIB.addReg(SuperReg, RegState::ImplicitDefine);
}
+ if (NumSubRegs > 1)
+ MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit);
+
// FIXME: Since this spills to another register instead of an actual
// frame index, we should delete the frame index when all references to
// it are fixed.
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
new file mode 100644
index 000000000000..b0bcdb45f511
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted
+# by the copy propagation after lowering the spill.
+
+---
+name: spill_sgpr128_use_subreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
+ stackPtrOffsetReg: $sgpr32
+
+stack:
+ - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 }
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: spill_sgpr128_use_subreg
+ ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GCN: renamable $sgpr1 = COPY $sgpr2
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1
+ ; GCN: S_ENDPGM 0, implicit $sgpr8
+ renamable $sgpr1 = COPY $sgpr2
+ SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5)
+ renamable $sgpr8 = COPY killed renamable $sgpr1
+ S_ENDPGM 0, implicit $sgpr8
+...
+
+---
+name: spill_sgpr128_use_kill
+tracksRegLiveness: true
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
+ stackPtrOffsetReg: $sgpr32
+
+stack:
+ - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 }
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GCN-LABEL: name: spill_sgpr128_use_kill
+ ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GCN: renamable $sgpr1 = COPY $sgpr2
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: S_ENDPGM 0
+ renamable $sgpr1 = COPY $sgpr2
+ SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5)
+ S_ENDPGM 0
+...
+
+---
+name: spill_vgpr128_use_subreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
+ stackPtrOffsetReg: $sgpr32
+
+stack:
+ - { id: 0, type: spill-slot, size: 16, alignment: 4 }
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+
+ ; GCN-LABEL: name: spill_vgpr128_use_subreg
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GCN: renamable $vgpr1 = COPY $vgpr2
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; GCN: renamable $vgpr8 = COPY $vgpr2
+ ; GCN: S_ENDPGM 0, implicit $vgpr8
+ renamable $vgpr1 = COPY $vgpr2
+ SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
+ renamable $vgpr8 = COPY killed renamable $vgpr1
+ S_ENDPGM 0, implicit $vgpr8
+...
+
+---
+name: spill_vgpr128_use_kill
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103
+ stackPtrOffsetReg: $sgpr32
+
+stack:
+ - { id: 0, type: spill-slot, size: 16, alignment: 4 }
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+
+ ; GCN-LABEL: name: spill_vgpr128_use_kill
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GCN: renamable $vgpr1 = COPY $vgpr2
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ ; GCN: S_ENDPGM 0
+ renamable $vgpr1 = COPY $vgpr2
+ SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 26f727d034d4..25a2d6ebb006 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -30,12 +30,12 @@ body: |
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)
More information about the llvm-commits
mailing list