[llvm-branch-commits] [llvm] AMDGPU: Delete spills of undef values (PR #119684)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Dec 12 02:46:53 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/119684
>From 03cca4cf3b65cb6988db7a983f3c3349fa0b390a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 28 Mar 2022 14:11:28 -0400
Subject: [PATCH 1/3] [Statepoint] Treat undef operands less specially
This reverts commit f7443905af1e06eaacda1e437fff8d54dc89c487.
This is to avoid an assertion if an undef operand appears in a
stackmap. This is important to avoid hitting verifier errors
when register allocation starts adding undefs in error scenarios.
Rather than trying to treat undef operands as special, leave them
alone and avoid producing an invalid spill. It would a bit more
precise to produce a spill of an undef register here, but that's not
exposed through the storeRegToStackSlot API.
https://reviews.llvm.org/D122605
This was an alternative to https://reviews.llvm.org/D122582
---
.../CodeGen/FixupStatepointCallerSaved.cpp | 2 -
llvm/lib/CodeGen/StackMaps.cpp | 6 --
.../X86/stackmap-undef-operand-anyregcc.mir | 61 +++++++++++++++++++
.../CodeGen/X86/statepoint-fixup-undef.mir | 8 +--
4 files changed, 65 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/stackmap-undef-operand-anyregcc.mir
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 3bb9da5f1a37bb..0ebe845e473fd6 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -381,8 +381,6 @@ class StatepointState {
EndIdx = MI.getNumOperands();
Idx < EndIdx; ++Idx) {
MachineOperand &MO = MI.getOperand(Idx);
- // Leave `undef` operands as is, StackMaps will rewrite them
- // into a constant.
if (!MO.isReg() || MO.isImplicit() || MO.isUndef())
continue;
Register Reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 81b288df3b07e0..7480963c1f5217 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -268,12 +268,6 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (MOI->isImplicit())
return ++MOI;
- if (MOI->isUndef()) {
- // Record `undef` register as constant. Use same value as ISel uses.
- Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE);
- return ++MOI;
- }
-
assert(MOI->getReg().isPhysical() &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
diff --git a/llvm/test/CodeGen/X86/stackmap-undef-operand-anyregcc.mir b/llvm/test/CodeGen/X86/stackmap-undef-operand-anyregcc.mir
new file mode 100644
index 00000000000000..6a322eb105e6f7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stackmap-undef-operand-anyregcc.mir
@@ -0,0 +1,61 @@
+# RUN: llc -mtriple=x86_64-apple-darwin -start-after=virtregrewriter -o - %s | FileCheck %s
+
+# Check there's no assertion for anyregcc with an undef operand to a stackmap.
+
+# CHECK: __LLVM_StackMaps:
+# CHECK-NEXT: .byte 3
+# CHECK-NEXT: .byte 0
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .long 1
+# CHECK-NEXT: .long 0
+# CHECK-NEXT: .long 1
+# CHECK-NEXT: .quad _undef_anyregcc_patchpoint
+# CHECK-NEXT: .quad 8
+# CHECK-NEXT: .quad 1
+# CHECK-NEXT: .quad 12
+# CHECK-NEXT: .long Ltmp0-_undef_anyregcc_patchpoint
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .short 2
+# CHECK-NEXT: .byte 1
+# CHECK-NEXT: .byte 0
+# CHECK-NEXT: .short 8
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .long 0
+# CHECK-NEXT: .byte 1
+# CHECK-NEXT: .byte 0
+# CHECK-NEXT: .short 8
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .long 0
+# CHECK-NEXT: .p2align 3
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .short 2
+# CHECK-NEXT: .short 0
+# CHECK-NEXT: .byte 0
+# CHECK-NEXT: .byte 8
+# CHECK-NEXT: .short 7
+# CHECK-NEXT: .byte 0
+# CHECK-NEXT: .byte 8
+# CHECK-NEXT: .p2align 3
+---
+name: undef_anyregcc_patchpoint
+tracksRegLiveness: true
+frameInfo:
+ hasPatchPoint: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: default, offset: 72, size: 8, alignment: 8, stack-id: default,
+ isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body: |
+ bb.0:
+ liveins: $rcx, $rdi, $rdx, $rsi, $r8, $r9
+
+ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ dead renamable $rax = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0)
+ renamable $rax = PATCHPOINT 12, 15, 0, 1, 13, undef renamable $rax, csr_64_allregs, implicit-def dead early-clobber $r11, implicit-def $rsp, implicit-def $ssp
+ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ RET 0, $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir b/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir
index 4a18351bde493d..edb0d517d5d52f 100644
--- a/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir
+++ b/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir
@@ -181,13 +181,13 @@ body: |
; STACKMAP-NEXT: .short 3
; STACKMAP-NEXT: .short 0
; STACKMAP-NEXT: .long 0
- ; This is a constant 0xFEFEFEFE we are looking for
- ; STACKMAP-NEXT: .byte 4
+ ; This is entry we're looking for
+ ; STACKMAP-NEXT: .byte 1
; STACKMAP-NEXT: .byte 0
- ; STACKMAP-NEXT: .short 8
+ ; STACKMAP-NEXT: .short 4
; STACKMAP-NEXT: .short 0
; STACKMAP-NEXT: .short 0
- ; STACKMAP-NEXT: .long -16843010
+ ; STACKMAP-NEXT: .long 0
; STACKMAP-NEXT: .byte 4
; STACKMAP-NEXT: .byte 0
; STACKMAP-NEXT: .short 8
>From f175e87d783cd62189ba3ba5a462bda5de96818a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 28 Mar 2022 11:24:48 -0400
Subject: [PATCH 2/3] AMDGPU: Delete spills of undef values
It would be a bit more logical to preserve the undef and do the normal
expansion, but this is less work. This avoids verifier errors in a
future patch which starts deleting liveness from registers after
allocation failures which results in spills of undef values.
https://reviews.llvm.org/D122607
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 12 ++++++
.../AMDGPU/sgpr-spill-partially-undef.mir | 42 +++++++++++++++++++
.../AMDGPU/spill-agpr-partially-undef.mir | 34 +++++++++++++++
llvm/test/CodeGen/AMDGPU/vgpr-spill.mir | 34 +++++++++++++++
4 files changed, 122 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 296c32fa4e0d09..4f8c5c6756b3bb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1954,6 +1954,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
LiveIntervals *LIS, bool OnlyToVGPR,
bool SpillToPhysVGPRLane) const {
+ if (MI->getOperand(0).isUndef()) {
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ return true;
+ }
+
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
ArrayRef<SpilledReg> VGPRSpills =
@@ -2375,6 +2382,11 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
+ if (VData->isUndef()) {
+ MI->eraseFromParent();
+ return true;
+ }
+
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
MFI->getStackPtrOffsetReg());
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
index 774785fb3966fc..d352e8a13da9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
@@ -54,3 +54,45 @@ body: |
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
...
+
+---
+name: sgpr_spill_s32_undef
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: sgpr_spill_s32_undef
+ ; CHECK: body:
+ ; CHECK-NEXT: bb.0:
+ ; CHECK-NOT: {{.+}}
+ ; CHECK: ...
+ SI_SPILL_S32_SAVE undef $sgpr8, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s32) into %stack.0, align 4, addrspace 5)
+
+...
+
+---
+name: sgpr_spill_s64_undef
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: sgpr_spill_s64_undef
+ ; CHECK: body:
+ ; CHECK-NEXT: bb.0:
+ ; CHECK-NOT: {{.+}}
+ ; CHECK: ...
+ SI_SPILL_S64_SAVE undef $sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
index c825674de7652c..b02b6e79d7a76f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -71,3 +71,37 @@ body: |
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
...
+
+---
+name: spill_a32_undef
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: spill_a32_undef
+ ; CHECK: S_ENDPGM 0
+ SI_SPILL_A32_SAVE undef $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ S_ENDPGM 0
+...
+
+---
+name: spill_a64_undef
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: spill_a64_undef
+ ; CHECK: S_ENDPGM 0
+ SI_SPILL_A64_SAVE undef $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
index c2badc5720f149..edea344a66a3cd 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -153,3 +153,37 @@ body: |
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...
+
+---
+name: spill_v32_undef
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: spill_v32_undef
+ ; CHECK: S_NOP 0, implicit undef $vgpr0
+ SI_SPILL_V32_SAVE undef $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ S_NOP 0, implicit undef $vgpr0
+...
+
+---
+name: spill_v64_undef
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: spill_v64_undef
+ ; CHECK: S_NOP 0, implicit undef $vgpr0_vgpr1
+ SI_SPILL_V64_SAVE undef $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
+ S_NOP 0, implicit undef $vgpr0_vgpr1
+...
>From 2bd0046adb0bdc1b7ae32ba653df59913fe72612 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 12 Dec 2024 14:14:29 +0900
Subject: [PATCH 3/3] Move where undef sgpr spills are deleted
---
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 7 +++++++
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 8 ++------
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index f868843318ff6c..d27c523425feb2 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -422,6 +422,13 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
if (!TII->isSGPRSpill(MI))
continue;
+ if (MI.getOperand(0).isUndef()) {
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ continue;
+ }
+
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 4f8c5c6756b3bb..8a23cf5e214967 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1954,12 +1954,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
LiveIntervals *LIS, bool OnlyToVGPR,
bool SpillToPhysVGPRLane) const {
- if (MI->getOperand(0).isUndef()) {
- if (Indexes)
- Indexes->removeMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- return true;
- }
+ assert(!MI->getOperand(0).isUndef() &&
+ "undef spill should have been deleted earlier");
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
More information about the llvm-branch-commits
mailing list