[llvm] 663f166 - AMDGPU: Fix verifier error on killed spill of partially undef register

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 15 06:59:25 PDT 2020


Author: Matt Arsenault
Date: 2020-10-15T09:45:44-04:00
New Revision: 663f16684d1a5f129874b7be9e1f486682977bfa

URL: https://github.com/llvm/llvm-project/commit/663f16684d1a5f129874b7be9e1f486682977bfa
DIFF: https://github.com/llvm/llvm-project/commit/663f16684d1a5f129874b7be9e1f486682977bfa.diff

LOG: AMDGPU: Fix verifier error on killed spill of partially undef register

This does unfortunately end up with extra waitcnts getting inserted
that were avoided before. Ideally we would avoid the spills of these
undef components in the first place.

Added: 
    llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
    llvm/test/CodeGen/AMDGPU/vgpr-spill.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
    llvm/test/CodeGen/AMDGPU/spill-agpr.mir
    llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7b1970f45a6c..9782e11a370d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -799,20 +799,29 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
       SrcDstRegState |= getKillRegState(IsKill);
     }
 
+    // Make sure the whole register is defined if there are undef components by
+    // adding an implicit def of the super-reg on the first instruction.
+    const bool NeedSuperRegDef = NumSubRegs > 1 && IsStore && i == 0;
+
     auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
 
     if (!MIB.getInstr()) {
       unsigned FinalReg = SubReg;
-      if (hasAGPRs(RC)) {
+
+      const bool IsAGPR = hasAGPRs(RC);
+      if (IsAGPR) {
         if (!TmpReg) {
           assert(RS && "Needs to have RegScavenger to spill an AGPR!");
           // FIXME: change to scavengeRegisterBackwards()
           TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
           RS->setRegUsed(TmpReg);
         }
-        if (IsStore)
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
+        if (IsStore) {
+          auto AccRead = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
             .addReg(SubReg, getKillRegState(IsKill));
+          if (NeedSuperRegDef)
+            AccRead.addReg(ValueReg, RegState::ImplicitDefine);
+        }
         SubReg = TmpReg;
       }
 
@@ -838,14 +847,21 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
           .addImm(0) // swz
           .addMemOperand(NewMMO);
 
+      if (!IsAGPR && NeedSuperRegDef)
+        MIB.addReg(ValueReg, RegState::ImplicitDefine);
+
       if (!IsStore && TmpReg != AMDGPU::NoRegister)
         MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
                       FinalReg)
           .addReg(TmpReg, RegState::Kill);
+    } else {
+      if (NeedSuperRegDef)
+        MIB.addReg(ValueReg, RegState::ImplicitDefine);
     }
 
-    if (NumSubRegs > 1)
+    if (NumSubRegs > 1) {
       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
+    }
   }
 
   if (ScratchOffsetRegDelta != 0) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index 4f9668f8d369..6fd99b8406d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -59,6 +59,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x10000
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
@@ -78,6 +79,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    global_load_dwordx4 v[43:46], v[59:60], off
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
@@ -342,6 +344,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x10000
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
@@ -361,6 +364,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    global_load_dwordx4 v[43:46], v[59:60], off
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
@@ -630,6 +634,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x10000
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
@@ -649,6 +654,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    global_load_dwordx4 v[43:46], v[59:60], off
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
new file mode 100644
index 000000000000..bdb4d042b452
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GFX908 %s
+
+---
+name: spill_a64_kill
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1
+
+    ; CHECK-LABEL: name: spill_a64_kill
+    ; CHECK: liveins: $agpr0_agpr1
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...
+
+# Make sure there's no verifier error on the undef spill component when the value is killed.
+
+---
+name: spill_a64_undef_sub1_killed
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $agpr0
+
+    ; CHECK-LABEL: name: spill_a64_undef_sub1_killed
+    ; CHECK: liveins: $agpr0
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...
+
+---
+name: spill_a64_undef_sub0_killed
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $agpr1
+
+    ; CHECK-LABEL: name: spill_a64_undef_sub0_killed
+    ; CHECK: liveins: $agpr1
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index c817b977eb9d..6d5af02301e2 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -78,7 +78,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1
   ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED: bb.1:
@@ -220,7 +220,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2
   ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
@@ -269,7 +269,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3
@@ -320,7 +320,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
@@ -373,7 +373,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
@@ -428,7 +428,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
@@ -487,7 +487,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@@ -562,7 +562,7 @@ body: |
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+  ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED:   $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED:   $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index b0bcdb45f511..d95ee977615a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -79,11 +79,11 @@ body:             |
     ; GCN-LABEL: name: spill_vgpr128_use_subreg
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN: renamable $vgpr1 = COPY $vgpr2
-    ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+    ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
-    ; GCN: renamable $vgpr8 = COPY $vgpr2
+    ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr1
     ; GCN: S_ENDPGM 0, implicit $vgpr8
     renamable $vgpr1 = COPY $vgpr2
     SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
@@ -108,7 +108,7 @@ body:             |
     ; GCN-LABEL: name: spill_vgpr128_use_kill
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN: renamable $vgpr1 = COPY $vgpr2
-    ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+    ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
     ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
new file mode 100644
index 000000000000..240f6778bff8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -0,0 +1,126 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GCN64 %s
+
+---
+name: spill_v32
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: spill_v32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: S_NOP 0, implicit $vgpr0
+    SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+    S_NOP 0, implicit $vgpr0
+...
+
+---
+name: spill_v32_kill
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: spill_v32_kill
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+    SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+...
+
+---
+name: spill_v64
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: spill_v64
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    ; CHECK: S_NOP 0, implicit $vgpr0_vgpr1
+    SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+    S_NOP 0, implicit $vgpr0_vgpr1
+...
+
+---
+name: spill_v64_kill
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: spill_v64_kill
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...
+
+# Make sure there's no verifier error on the undef spill component when the value is killed.
+
+---
+name: spill_v64_undef_sub1_killed
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: spill_v64_undef_sub1_killed
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...
+
+---
+name: spill_v64_undef_sub0_killed
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body:             |
+  bb.0:
+    liveins: $vgpr1
+
+    ; CHECK-LABEL: name: spill_v64_undef_sub0_killed
+    ; CHECK: liveins: $vgpr1
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
+    ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
+    SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
+...


        


More information about the llvm-commits mailing list