[llvm] 4657947 - AMDGPU: Fix spill/restore of 192-bit registers

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 10:27:32 PDT 2020


Author: Matt Arsenault
Date: 2020-06-14T13:12:01-04:00
New Revision: 46579471fd2b9c00ba982325f53f30192cc5797f

URL: https://github.com/llvm/llvm-project/commit/46579471fd2b9c00ba982325f53f30192cc5797f
DIFF: https://github.com/llvm/llvm-project/commit/46579471fd2b9c00ba982325f53f30192cc5797f.diff

LOG: AMDGPU: Fix spill/restore of 192-bit registers

I tried to use an IR inline asm test, but that doesn't work since the
inline asm handling asserts without an MVT to use.

Added: 
    llvm/test/CodeGen/AMDGPU/spill192.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b72b7eb69f1e..200f13f9c450 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1193,6 +1193,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_S128_SAVE;
   case 20:
     return AMDGPU::SI_SPILL_S160_SAVE;
+  case 24:
+    return AMDGPU::SI_SPILL_S192_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_S256_SAVE;
   case 64:
@@ -1216,6 +1218,8 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_V128_SAVE;
   case 20:
     return AMDGPU::SI_SPILL_V160_SAVE;
+  case 24:
+    return AMDGPU::SI_SPILL_V192_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_V256_SAVE;
   case 64:
@@ -1321,6 +1325,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_S128_RESTORE;
   case 20:
     return AMDGPU::SI_SPILL_S160_RESTORE;
+  case 24:
+    return AMDGPU::SI_SPILL_S192_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_S256_RESTORE;
   case 64:
@@ -1344,6 +1350,8 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_V128_RESTORE;
   case 20:
     return AMDGPU::SI_SPILL_V160_RESTORE;
+  case 24:
+    return AMDGPU::SI_SPILL_V192_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_V256_RESTORE;
   case 64:

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index fff2189498e1..f5918ee75870 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -662,6 +662,7 @@ defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S96  : SI_SPILL_SGPR <SReg_96>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
+defm SI_SPILL_S192 : SI_SPILL_SGPR <SReg_192>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
@@ -701,6 +702,7 @@ defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
 defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
 defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
 defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
+defm SI_SPILL_V192 : SI_SPILL_VGPR <VReg_192>;
 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
 defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 840dcc1c0cbd..0db2ee7160d9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -556,6 +556,11 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   case AMDGPU::SI_SPILL_V256_SAVE:
   case AMDGPU::SI_SPILL_V256_RESTORE:
     return 8;
+  case AMDGPU::SI_SPILL_S192_SAVE:
+  case AMDGPU::SI_SPILL_S192_RESTORE:
+  case AMDGPU::SI_SPILL_V192_SAVE:
+  case AMDGPU::SI_SPILL_V192_RESTORE:
+    return 6;
   case AMDGPU::SI_SPILL_S160_SAVE:
   case AMDGPU::SI_SPILL_S160_RESTORE:
   case AMDGPU::SI_SPILL_V160_SAVE:
@@ -1203,6 +1208,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   case AMDGPU::SI_SPILL_S1024_SAVE:
   case AMDGPU::SI_SPILL_S512_SAVE:
   case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S192_SAVE:
   case AMDGPU::SI_SPILL_S160_SAVE:
   case AMDGPU::SI_SPILL_S128_SAVE:
   case AMDGPU::SI_SPILL_S96_SAVE:
@@ -1212,6 +1218,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   case AMDGPU::SI_SPILL_S1024_RESTORE:
   case AMDGPU::SI_SPILL_S512_RESTORE:
   case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_S192_RESTORE:
   case AMDGPU::SI_SPILL_S160_RESTORE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
   case AMDGPU::SI_SPILL_S96_RESTORE:
@@ -1247,6 +1254,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S1024_SAVE:
     case AMDGPU::SI_SPILL_S512_SAVE:
     case AMDGPU::SI_SPILL_S256_SAVE:
+    case AMDGPU::SI_SPILL_S192_SAVE:
     case AMDGPU::SI_SPILL_S160_SAVE:
     case AMDGPU::SI_SPILL_S128_SAVE:
     case AMDGPU::SI_SPILL_S96_SAVE:
@@ -1260,6 +1268,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S1024_RESTORE:
     case AMDGPU::SI_SPILL_S512_RESTORE:
     case AMDGPU::SI_SPILL_S256_RESTORE:
+    case AMDGPU::SI_SPILL_S192_RESTORE:
     case AMDGPU::SI_SPILL_S160_RESTORE:
     case AMDGPU::SI_SPILL_S128_RESTORE:
     case AMDGPU::SI_SPILL_S96_RESTORE:

diff  --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
new file mode 100644
index 000000000000..26f727d034d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -0,0 +1,104 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s
+
+# Make sure spill/restore of 192 bit registers works. We have to
+# settle for a MIR test for now since inlineasm fails without 192-bit
+# MVT.
+
+---
+name: spill_restore_sgpr192
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  ; SPILLED-LABEL: name: spill_restore_sgpr192
+  ; SPILLED: bb.0:
+  ; SPILLED:   successors: %bb.1(0x80000000)
+  ; SPILLED:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; SPILLED:   SI_SPILL_S192_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 24 into %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; SPILLED: bb.1:
+  ; SPILLED:   successors: %bb.2(0x80000000)
+  ; SPILLED:   S_NOP 1
+  ; SPILLED: bb.2:
+  ; SPILLED:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED-LABEL: name: spill_restore_sgpr192
+  ; EXPANDED: bb.0:
+  ; EXPANDED:   successors: %bb.1(0x80000000)
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0
+  ; EXPANDED:   $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0
+  ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; EXPANDED: bb.1:
+  ; EXPANDED:   successors: %bb.2(0x80000000)
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   S_NOP 1
+  ; EXPANDED: bb.2:
+  ; EXPANDED:   liveins: $vgpr0
+  ; EXPANDED:   $sgpr4 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED:   $sgpr5 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 1
+  ; EXPANDED:   $sgpr6 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 2
+  ; EXPANDED:   $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3
+  ; EXPANDED:   $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4
+  ; EXPANDED:   $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5
+  ; EXPANDED:   S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  bb.0:
+    S_NOP 0, implicit-def %0:sgpr_192
+    S_CBRANCH_SCC1 implicit undef $scc, %bb.1
+
+  bb.1:
+    S_NOP 1
+
+  bb.2:
+    S_NOP 0, implicit %0
+...
+
+---
+name: spill_restore_vgpr192
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+body: |
+  ; SPILLED-LABEL: name: spill_restore_vgpr192
+  ; SPILLED: bb.0:
+  ; SPILLED:   successors: %bb.1(0x80000000)
+  ; SPILLED:   S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ; SPILLED:   SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; SPILLED: bb.1:
+  ; SPILLED:   successors: %bb.2(0x80000000)
+  ; SPILLED:   S_NOP 1
+  ; SPILLED: bb.2:
+  ; SPILLED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ; EXPANDED-LABEL: name: spill_restore_vgpr192
+  ; EXPANDED: bb.0:
+  ; EXPANDED:   successors: %bb.1(0x80000000)
+  ; EXPANDED:   S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ; EXPANDED:   SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5)
+  ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; EXPANDED: bb.1:
+  ; EXPANDED:   successors: %bb.2(0x80000000)
+  ; EXPANDED:   S_NOP 1
+  ; EXPANDED: bb.2:
+  ; EXPANDED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
+  ; EXPANDED:   S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_192
+    S_CBRANCH_SCC1 implicit undef $scc, %bb.1
+
+  bb.1:
+    S_NOP 1
+
+  bb.2:
+    S_NOP 0, implicit %0
+...


        


More information about the llvm-commits mailing list