[llvm] [AMDGPU] Mark AGPR tuple implicit in the first instr of AGPR spills. (PR #115285)

Pravin Jagtap via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 7 20:56:41 PST 2024


https://github.com/pravinjagtap updated https://github.com/llvm/llvm-project/pull/115285

>From 1c88d198364c98208d4497e8a3aa91330ee76c23 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Thu, 7 Nov 2024 14:58:02 +0530
Subject: [PATCH 1/2] [AMDGPU] Mark AGPR tuple implicit in the first instr of
 AGPR spills.

When AGPRs are spilled to stack through VGPRs, the pei only
marks the AGPR tuple as implicit-def. To preserve the liveness,
it should also mark the tuple implicit.
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp              |  2 ++
 .../AMDGPU/av-spill-expansion-with-machine-cp.mir      | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 707468892d1779..dc7952ee67f033 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1808,6 +1808,8 @@ void SIRegisterInfo::buildSpillLoadStore(
                            .addReg(SubReg, getKillRegState(IsKill));
         if (NeedSuperRegDef)
           AccRead.addReg(ValueReg, RegState::ImplicitDefine);
+        if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
+          AccRead.addReg(ValueReg, RegState::Implicit);
         AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
       }
       SubReg = TmpIntermediateVGPR;
diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
index 460f6d24b9b18e..5bdb24db9accab 100644
--- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
+++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
@@ -3,7 +3,6 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 %s -o - -run-pass prologepilog,machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908-PEI-MACHINECP %s
 
 # When VGPRs are available for spilling, prologepilog marks the tuple implicit-def as well as implicit in the first spill instruction.
-# As a consequence, machine-cp would NOT delete agpr2 copy here.
 
 ---
 name:  agpr-spill-to-vgpr-machine-cp
@@ -43,8 +42,8 @@ body: |
     S_ENDPGM 0
 ...
 
-# When VGPRs are NOT available for spilling (stack is used), prologepilog marks the tuple implicit-def only and NOT implicit.
-# As a consequence, machine-cp would delete agpr2 copy here. Presently, this is incorrect behavior.
+# When VGPRs are NOT available for spilling (stack is used), prologepilog should also mark the tuple implicit-def and implicit (similar to above usecase).
+# As a consequence, machine-cp would not delete agpr2 copy here.
 
 ---
 name:  agpr-spill-to-vgpr-to-stack-machine-cp
@@ -66,7 +65,7 @@ body: |
     ; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
     ; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
-    ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
+    ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
@@ -77,9 +76,10 @@ body: |
     ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
     ; GFX908-PEI-MACHINECP-NEXT: {{  $}}
     ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
+    ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
-    ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
+    ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)

>From 5d035e6466c2e4d38bc3a8a081328f688b215115 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Fri, 8 Nov 2024 10:25:42 +0530
Subject: [PATCH 2/2] Enabled tests for entry function

---
 .../av-spill-expansion-with-machine-cp.mir     | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
index 5bdb24db9accab..dfe4b8a33f3967 100644
--- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
+++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
@@ -10,6 +10,7 @@ tracksRegLiveness: true
 stack:
   - { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
 machineFunctionInfo:
+  isEntryFunction: true
   scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
   stackPtrOffsetReg: '$sgpr32'
   hasSpilledVGPRs: true
@@ -51,6 +52,7 @@ tracksRegLiveness: true
 stack:
   - { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
 machineFunctionInfo:
+  isEntryFunction: true
   scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
   stackPtrOffsetReg: '$sgpr32'
   hasSpilledVGPRs: true
@@ -59,30 +61,34 @@ body: |
     successors:
     liveins: $vgpr0, $vgpr1
     ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
-    ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+    ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-PEI-NEXT: {{  $}}
+    ; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
     ; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
     ; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-NEXT: S_ENDPGM 0
     ;
     ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
-    ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
+    ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-PEI-MACHINECP-NEXT: {{  $}}
+    ; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
     ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-MACHINECP-NEXT: S_ENDPGM 0
     renamable $agpr0 = COPY renamable $vgpr0, implicit $exec



More information about the llvm-commits mailing list