[llvm] f38a8b5 - [AMDGPU] Fix 224-bit spills

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 29 08:53:08 PDT 2021


Author: Piotr Sobczak
Date: 2021-06-29T17:52:16+02:00
New Revision: f38a8b54ea31d6c915e2d6e259ebc083883e6f6d

URL: https://github.com/llvm/llvm-project/commit/f38a8b54ea31d6c915e2d6e259ebc083883e6f6d
DIFF: https://github.com/llvm/llvm-project/commit/f38a8b54ea31d6c915e2d6e259ebc083883e6f6d.diff

LOG: [AMDGPU] Fix 224-bit spills

Related to D104622.

Differential Revision: https://reviews.llvm.org/D105109

Added: 
    llvm/test/CodeGen/AMDGPU/spill224.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 358abec1e06c..4084619240c5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1394,6 +1394,8 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_A160_SAVE;
   case 24:
     return AMDGPU::SI_SPILL_A192_SAVE;
+  case 28:
+    return AMDGPU::SI_SPILL_A224_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_A256_SAVE;
   case 64:
@@ -1531,6 +1533,8 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_A160_RESTORE;
   case 24:
     return AMDGPU::SI_SPILL_A192_RESTORE;
+  case 28:
+    return AMDGPU::SI_SPILL_A224_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_A256_RESTORE;
   case 64:

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index d44c8c48a246..7fd0765cbbe6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -814,6 +814,13 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   case AMDGPU::SI_SPILL_A256_SAVE:
   case AMDGPU::SI_SPILL_A256_RESTORE:
     return 8;
+  case AMDGPU::SI_SPILL_S224_SAVE:
+  case AMDGPU::SI_SPILL_S224_RESTORE:
+  case AMDGPU::SI_SPILL_V224_SAVE:
+  case AMDGPU::SI_SPILL_V224_RESTORE:
+  case AMDGPU::SI_SPILL_A224_SAVE:
+  case AMDGPU::SI_SPILL_A224_RESTORE:
+    return 7;
   case AMDGPU::SI_SPILL_S192_SAVE:
   case AMDGPU::SI_SPILL_S192_RESTORE:
   case AMDGPU::SI_SPILL_V192_SAVE:
@@ -1473,6 +1480,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   case AMDGPU::SI_SPILL_S1024_SAVE:
   case AMDGPU::SI_SPILL_S512_SAVE:
   case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S224_SAVE:
   case AMDGPU::SI_SPILL_S192_SAVE:
   case AMDGPU::SI_SPILL_S160_SAVE:
   case AMDGPU::SI_SPILL_S128_SAVE:
@@ -1483,6 +1491,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   case AMDGPU::SI_SPILL_S1024_RESTORE:
   case AMDGPU::SI_SPILL_S512_RESTORE:
   case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_S224_RESTORE:
   case AMDGPU::SI_SPILL_S192_RESTORE:
   case AMDGPU::SI_SPILL_S160_RESTORE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
@@ -1519,6 +1528,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S1024_SAVE:
     case AMDGPU::SI_SPILL_S512_SAVE:
     case AMDGPU::SI_SPILL_S256_SAVE:
+    case AMDGPU::SI_SPILL_S224_SAVE:
     case AMDGPU::SI_SPILL_S192_SAVE:
     case AMDGPU::SI_SPILL_S160_SAVE:
     case AMDGPU::SI_SPILL_S128_SAVE:
@@ -1533,6 +1543,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S1024_RESTORE:
     case AMDGPU::SI_SPILL_S512_RESTORE:
     case AMDGPU::SI_SPILL_S256_RESTORE:
+    case AMDGPU::SI_SPILL_S224_RESTORE:
     case AMDGPU::SI_SPILL_S192_RESTORE:
     case AMDGPU::SI_SPILL_S160_RESTORE:
     case AMDGPU::SI_SPILL_S128_RESTORE:
@@ -1547,6 +1558,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V1024_SAVE:
     case AMDGPU::SI_SPILL_V512_SAVE:
     case AMDGPU::SI_SPILL_V256_SAVE:
+    case AMDGPU::SI_SPILL_V224_SAVE:
     case AMDGPU::SI_SPILL_V192_SAVE:
     case AMDGPU::SI_SPILL_V160_SAVE:
     case AMDGPU::SI_SPILL_V128_SAVE:
@@ -1556,6 +1568,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_A1024_SAVE:
     case AMDGPU::SI_SPILL_A512_SAVE:
     case AMDGPU::SI_SPILL_A256_SAVE:
+    case AMDGPU::SI_SPILL_A224_SAVE:
     case AMDGPU::SI_SPILL_A192_SAVE:
     case AMDGPU::SI_SPILL_A160_SAVE:
     case AMDGPU::SI_SPILL_A128_SAVE:
@@ -1584,6 +1597,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V128_RESTORE:
     case AMDGPU::SI_SPILL_V160_RESTORE:
     case AMDGPU::SI_SPILL_V192_RESTORE:
+    case AMDGPU::SI_SPILL_V224_RESTORE:
     case AMDGPU::SI_SPILL_V256_RESTORE:
     case AMDGPU::SI_SPILL_V512_RESTORE:
     case AMDGPU::SI_SPILL_V1024_RESTORE:
@@ -1593,6 +1607,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_A128_RESTORE:
     case AMDGPU::SI_SPILL_A160_RESTORE:
     case AMDGPU::SI_SPILL_A192_RESTORE:
+    case AMDGPU::SI_SPILL_A224_RESTORE:
     case AMDGPU::SI_SPILL_A256_RESTORE:
     case AMDGPU::SI_SPILL_A512_RESTORE:
     case AMDGPU::SI_SPILL_A1024_RESTORE: {

diff  --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir
new file mode 100644
index 000000000000..e8d6a80e84f9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill224.mir
@@ -0,0 +1,104 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s
+
+# Make sure spill/restore of 224 bit registers works.
+
+---
+name: spill_restore_sgpr224
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  ; SPILLED-LABEL: name: spill_restore_sgpr224
+  ; SPILLED: bb.0:
+  ; SPILLED:   successors: %bb.1(0x80000000)
+  ; SPILLED:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; SPILLED:   SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store 28 into %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; SPILLED: bb.1:
+  ; SPILLED:   successors: %bb.2(0x80000000)
+  ; SPILLED:   S_NOP 1
+  ; SPILLED: bb.2:
+  ; SPILLED:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 28 from %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED-LABEL: name: spill_restore_sgpr224
+  ; EXPANDED: bb.0:
+  ; EXPANDED:   successors: %bb.1(0x80000000)
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; EXPANDED: bb.1:
+  ; EXPANDED:   successors: %bb.2(0x80000000)
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   S_NOP 1
+  ; EXPANDED: bb.2:
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  ; EXPANDED:   $sgpr5 = V_READLANE_B32 $vgpr0, 1
+  ; EXPANDED:   $sgpr6 = V_READLANE_B32 $vgpr0, 2
+  ; EXPANDED:   $sgpr7 = V_READLANE_B32 $vgpr0, 3
+  ; EXPANDED:   $sgpr8 = V_READLANE_B32 $vgpr0, 4
+  ; EXPANDED:   $sgpr9 = V_READLANE_B32 $vgpr0, 5
+  ; EXPANDED:   $sgpr10 = V_READLANE_B32 $vgpr0, 6
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+  bb.0:
+    S_NOP 0, implicit-def %0:sgpr_224
+    S_CBRANCH_SCC1 implicit undef $scc, %bb.1
+
+  bb.1:
+    S_NOP 1
+
+  bb.2:
+    S_NOP 0, implicit %0
+...
+
+---
+name: spill_restore_vgpr224
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  ; SPILLED-LABEL: name: spill_restore_vgpr224
+  ; SPILLED: bb.0:
+  ; SPILLED:   successors: %bb.1(0x80000000)
+  ; SPILLED:   S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+  ; SPILLED:   SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; SPILLED: bb.1:
+  ; SPILLED:   successors: %bb.2(0x80000000)
+  ; SPILLED:   S_NOP 1
+  ; SPILLED: bb.2:
+  ; SPILLED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+  ; EXPANDED-LABEL: name: spill_restore_vgpr224
+  ; EXPANDED: bb.0:
+  ; EXPANDED:   successors: %bb.1(0x80000000)
+  ; EXPANDED:   S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+  ; EXPANDED:   SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5)
+  ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; EXPANDED: bb.1:
+  ; EXPANDED:   successors: %bb.2(0x80000000)
+  ; EXPANDED:   S_NOP 1
+  ; EXPANDED: bb.2:
+  ; EXPANDED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5)
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_224
+    S_CBRANCH_SCC1 implicit undef $scc, %bb.1
+
+  bb.1:
+    S_NOP 1
+
+  bb.2:
+    S_NOP 0, implicit %0
+...


        


More information about the llvm-commits mailing list