[llvm] 81827f8 - [AMDGPU] Support wwm-reg AV spill pseudos

Thu Aug 17 07:34:58 PDT 2023

Author: Christudasan Devadasan
Date: 2023-08-17T20:04:18+05:30
New Revision: 81827f8cfb223b2e0ba654ba6c838418d6cb67e5

URL: https://github.com/llvm/llvm-project/commit/81827f8cfb223b2e0ba654ba6c838418d6cb67e5
DIFF: https://github.com/llvm/llvm-project/commit/81827f8cfb223b2e0ba654ba6c838418d6cb67e5.diff

LOG: [AMDGPU] Support wwm-reg AV spill pseudos

The wwm register spill pseudos are currently defined for VGPR_32
regclass. It causes a verifier error for gfx908 or above as the
regalloc sometimes restores the values to the vector superclass AV_32.
Fixing it by supporting AV wwm-spill pseudos as well.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D155646

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7a35139da8cbd8..a0508b550881ca 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1589,11 +1589,15 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
   }
 }
 
-static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
+static unsigned getWWMRegSpillSaveOpcode(unsigned Size,
+                                         bool IsVectorSuperClass) {
   // Currently, there is only 32-bit WWM register spills needed.
   if (Size != 4)
     llvm_unreachable("unknown wwm register spill size");
 
+  if (IsVectorSuperClass)
+    return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
+
   return AMDGPU::SI_SPILL_WWM_V32_SAVE;
 }
 
@@ -1602,11 +1606,13 @@ static unsigned getVectorRegSpillSaveOpcode(Register Reg,
                                             unsigned Size,
                                             const SIRegisterInfo &TRI,
                                             const SIMachineFunctionInfo &MFI) {
+  bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
   // Choose the right opcode if spilling a WWM register.
   if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
-    return getWWMRegSpillSaveOpcode(Size);
+    return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass);
 
-  if (TRI.isVectorSuperClass(RC))
+  if (IsVectorSuperClass)
     return getAVSpillSaveOpcode(Size);
 
   return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
@@ -1809,11 +1815,15 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
   }
 }
 
-static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
+static unsigned getWWMRegSpillRestoreOpcode(unsigned Size,
+                                            bool IsVectorSuperClass) {
   // Currently, there is only 32-bit WWM register spills needed.
   if (Size != 4)
     llvm_unreachable("unknown wwm register spill size");
 
+  if (IsVectorSuperClass)
+    return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
+
   return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
 }
 
@@ -1821,11 +1831,13 @@ static unsigned
 getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
                                unsigned Size, const SIRegisterInfo &TRI,
                                const SIMachineFunctionInfo &MFI) {
+  bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+
   // Choose the right opcode if restoring a WWM register.
   if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
-    return getWWMRegSpillRestoreOpcode(Size);
+    return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass);
 
-  if (TRI.isVectorSuperClass(RC))
+  if (IsVectorSuperClass)
     return getAVSpillRestoreOpcode(Size);
 
   return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 3c8b7cf2e1a8ed..e85917a4c0f329 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -670,7 +670,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 
   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
-           Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
+           Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
+           Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
+           Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
   }
 
   static bool isDPP(const MachineInstr &MI) {

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b0d7afd4eb7abe..5d239e2da17edf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -952,8 +952,10 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
 defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
 defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
 
-let isConvergent = 1 in
-defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
+let isConvergent = 1 in {
+  defm SI_SPILL_WWM_V32  : SI_SPILL_VGPR <VGPR_32>;
+  defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR <AV_32, 1>;
+}
 
 def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
   (outs SReg_64:$dst),

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c8704b43e42b8e..b69bea225ac2ef 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1065,6 +1065,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   case AMDGPU::SI_SPILL_AV32_RESTORE:
   case AMDGPU::SI_SPILL_WWM_V32_SAVE:
   case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+  case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
+  case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
     return 1;
   default: llvm_unreachable("Invalid spill opcode");
   }
@@ -2144,7 +2146,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_AV96_SAVE:
     case AMDGPU::SI_SPILL_AV64_SAVE:
     case AMDGPU::SI_SPILL_AV32_SAVE:
-    case AMDGPU::SI_SPILL_WWM_V32_SAVE: {
+    case AMDGPU::SI_SPILL_WWM_V32_SAVE:
+    case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2211,7 +2214,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_AV384_RESTORE:
     case AMDGPU::SI_SPILL_AV512_RESTORE:
     case AMDGPU::SI_SPILL_AV1024_RESTORE:
-    case AMDGPU::SI_SPILL_WWM_V32_RESTORE: {
+    case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
+    case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==

diff  --git a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
index 9c4b0107b18fc0..bdf9d88bbd8bb7 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
+++ b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
@@ -1,12 +1,5 @@
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 %s -o /dev/null 2>&1 | FileCheck %s
-
-# This test would fail as there is no wwm-register spill pseudo instructions supported for the vector superclass (AV).
-# Currently there is only VGPR_32 regclass spilling allowed for wwm-registers.
-
-# CHECK: Bad machine code: Illegal virtual register for instruction
-# CHECK: instruction: {{.*}}    [[AV_REG:%[0-9]+]]:av_32 = SI_SPILL_WWM_V32_RESTORE
-# CHECK-NEXT: - operand 0:   [[AV_REG]]:av_32
-# CHECK-NEXT: Expected a VGPR_32 register, but got a AV_32 register
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REGALLOC %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REWRITER %s
 
 name:            test_wwm_reg_superclass_spill
 tracksRegLiveness: true
@@ -19,6 +12,11 @@ machineFunctionInfo:
   sgprForEXECCopy:   '$sgpr100_sgpr101'
 body:             |
   bb.0:
+    ; GCN-REGALLOC-NUM-2: %{{[0-9]+}}:av_32 = SI_SPILL_WWM_AV32_RESTORE
+    ; GCN-REGALLOC: S_ENDPGM 0
+    ;
+    ; GCN-REWRITER-NUM-2: renamable $vgpr0 = SI_SPILL_WWM_AV32_RESTORE
+    ; GCN-REWRITER: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
     $sgpr0_sgpr1 = IMPLICIT_DEF
     %temp0:vgpr_32(s32) = COPY $vgpr0