[llvm] 370aa2f - InlineSpiller: Don't fold spills into undef reads

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 22 18:04:31 PDT 2022


Author: Matt Arsenault
Date: 2022-06-22T20:47:55-04:00
New Revision: 370aa2f88ffabae5831bbc350c03d7dcc757580b

URL: https://github.com/llvm/llvm-project/commit/370aa2f88ffabae5831bbc350c03d7dcc757580b
DIFF: https://github.com/llvm/llvm-project/commit/370aa2f88ffabae5831bbc350c03d7dcc757580b.diff

LOG: InlineSpiller: Don't fold spills into undef reads

This was producing a load into a dead register which was a verifier
error.

Added: 
    llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir

Modified: 
    llvm/lib/CodeGen/InlineSpiller.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 4cf7c20fe2bde..06c660807c5c5 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -838,6 +838,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
     unsigned Idx = OpPair.second;
     assert(MI == OpPair.first && "Instruction conflict during operand folding");
     MachineOperand &MO = MI->getOperand(Idx);
+
+    // No point restoring an undef read, and we'll produce an invalid live
+    // interval.
+    // TODO: Is this really the correct way to handle undef tied uses?
+    if (MO.isUse() && !MO.readsReg() && !MO.isTied())
+      continue;
+
     if (MO.isImplicit()) {
       ImpReg = MO.getReg();
       continue;

diff  --git a/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
new file mode 100644
index 0000000000000..3616d617f84a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stress-regalloc=4 -verify-regalloc -start-before=greedy,0 -stop-after=virtregrewriter,0 %s -o - | FileCheck %s
+
+# Check that we don't generate *** Bad machine code: Instruction loads
+# from dead spill slot ***
+
+
+---
+name:            restore_undef_copy_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  maxKernArgAlign: 1
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       8
+body:             |
+  ; CHECK-LABEL: name: restore_undef_copy_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 undef renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK-NEXT:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = KILL undef renamable $sgpr6_sgpr7
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr10_sgpr11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   SI_SPILL_S64_SAVE killed renamable $sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  bb.0:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:sgpr_64 = COPY $sgpr10_sgpr11
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
+
+  bb.1:
+    %1:sreg_64 = S_OR_SAVEEXEC_B64 undef %2:sreg_64, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64_term $exec, %1, implicit-def $scc
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
+
+  bb.2:
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    ; A restore for %0 should not be inserted here.
+    $sgpr6_sgpr7 = COPY undef %0
+    dead $sgpr30_sgpr31 = SI_CALL undef %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    S_BRANCH %bb.5
+
+  bb.3:
+
+  bb.4:
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    $sgpr4_sgpr5 = COPY %0
+    dead $sgpr30_sgpr31 = SI_CALL undef %3:sreg_64_xexec, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+
+  bb.5:
+
+...
+


        


More information about the llvm-commits mailing list