[llvm] 370aa2f - InlineSpiller: Don't fold spills into undef reads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 22 18:04:31 PDT 2022
Author: Matt Arsenault
Date: 2022-06-22T20:47:55-04:00
New Revision: 370aa2f88ffabae5831bbc350c03d7dcc757580b
URL: https://github.com/llvm/llvm-project/commit/370aa2f88ffabae5831bbc350c03d7dcc757580b
DIFF: https://github.com/llvm/llvm-project/commit/370aa2f88ffabae5831bbc350c03d7dcc757580b.diff
LOG: InlineSpiller: Don't fold spills into undef reads
This was producing a load into a dead register which was a verifier
error.
Added:
llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
Modified:
llvm/lib/CodeGen/InlineSpiller.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 4cf7c20fe2bde..06c660807c5c5 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -838,6 +838,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
unsigned Idx = OpPair.second;
assert(MI == OpPair.first && "Instruction conflict during operand folding");
MachineOperand &MO = MI->getOperand(Idx);
+
+ // No point restoring an undef read, and we'll produce an invalid live
+ // interval.
+ // TODO: Is this really the correct way to handle undef tied uses?
+ if (MO.isUse() && !MO.readsReg() && !MO.isTied())
+ continue;
+
if (MO.isImplicit()) {
ImpReg = MO.getReg();
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
new file mode 100644
index 0000000000000..3616d617f84a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stress-regalloc=4 -verify-regalloc -start-before=greedy,0 -stop-after=virtregrewriter,0 %s -o - | FileCheck %s
+
+# Check that we don't generate *** Bad machine code: Instruction loads
+# from dead spill slot ***
+
+
+---
+name: restore_undef_copy_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ maxKernArgAlign: 1
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 8
+body: |
+ ; CHECK-LABEL: name: restore_undef_copy_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 undef renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: $sgpr6_sgpr7 = KILL undef renamable $sgpr6_sgpr7
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr10_sgpr11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sgpr_64 = COPY $sgpr10_sgpr11
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+
+ bb.1:
+ %1:sreg_64 = S_OR_SAVEEXEC_B64 undef %2:sreg_64, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64_term $exec, %1, implicit-def $scc
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
+
+ bb.2:
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; A restore for %0 should not be inserted here.
+ $sgpr6_sgpr7 = COPY undef %0
+ dead $sgpr30_sgpr31 = SI_CALL undef %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ S_BRANCH %bb.5
+
+ bb.3:
+
+ bb.4:
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ $sgpr4_sgpr5 = COPY %0
+ dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+
+ bb.5:
+
+...
+
More information about the llvm-commits
mailing list