[llvm] [AMDGPU] Inplace FI elimination during PEI for scalar copy instruction (PR #99556)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 12:55:37 PDT 2024
https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/99556
>From 4c43e74e9d3a7cd217b542c8a0eadc1ef83eede3 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 01:07:54 +0530
Subject: [PATCH 1/8] [AMDGPU] Inplace FI elimination during PEI for scalar
copy instruction
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 70 +++-
llvm/test/CodeGen/AMDGPU/frame-index.mir | 324 ++++++++++++++++++
.../CodeGen/AMDGPU/reduce-frame-index.mir | 176 ++++++++++
3 files changed, 552 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8a315aa822786..05153383a3a21 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2449,7 +2449,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
? &AMDGPU::SReg_32RegClass
: &AMDGPU::VGPR_32RegClass;
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
- MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
+ MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+ MI->getOpcode() == AMDGPU::S_MOV_B32;
Register ResultReg =
IsCopy ? MI->getOperand(0).getReg()
: RS->scavengeRegisterBackwards(*RC, MI, false, 0);
@@ -2458,7 +2459,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (Offset == 0) {
unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
: AMDGPU::V_LSHRREV_B32_e64;
- auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
+ Register TmpResultReg = ResultReg;
+ if (IsSALU && LiveSCC) {
+ TmpResultReg = RS->scavengeRegisterBackwards(
+ AMDGPU::VGPR_32RegClass, MI, false, 0);
+ }
+
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
// For V_LSHRREV, the operands are reversed (the shift count goes
// first).
@@ -2468,11 +2475,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (IsSALU && !LiveSCC)
Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
if (IsSALU && LiveSCC) {
- Register NewDest = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, Shift, false, 0);
+ Register NewDest =
+ IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+ Shift, false, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
NewDest)
- .addReg(ResultReg);
+ .addReg(TmpResultReg);
ResultReg = NewDest;
}
} else {
@@ -2523,22 +2532,47 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// We may have 1 free scratch SGPR even though a carry out is
// unavailable. Only one additional mov is needed.
- Register TmpScaledReg = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
- Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
- .addReg(FrameReg)
- .addImm(ST.getWavefrontSizeLog2());
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
- .addReg(ScaledReg, RegState::Kill)
- .addImm(Offset);
+ Register TmpScaledReg =
+ IsCopy && IsSALU
+ ? ResultReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0, false);
+ Register ScaledReg =
+ TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
+ Register TmpResultReg = ScaledReg;
+
+ if (!LiveSCC) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg)
+ .addReg(FrameReg)
+ .addImm(ST.getWavefrontSizeLog2());
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(Offset);
+ } else {
+ TmpResultReg = RS->scavengeRegisterBackwards(
+ AMDGPU::VGPR_32RegClass, MI, false, 0);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
+ TmpResultReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(FrameReg);
+ auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
+ TmpResultReg);
+ Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+ Register NewDest =
+ IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32RegClass, Add, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ NewDest)
+ .addReg(TmpResultReg);
+ ResultReg = NewDest;
+ }
if (!IsSALU)
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
- .addReg(ScaledReg, RegState::Kill);
+ .addReg(TmpResultReg, RegState::Kill);
else
- ResultReg = ScaledReg;
-
+ ResultReg = TmpResultReg;
// If there were truly no free SGPRs, we need to undo everything.
if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index d8736c5276a26..f4476a497f754 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -211,3 +211,327 @@ body: |
renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
...
+---
+name: materialize_fi_s_mov_b32_offset_0_dead_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_dead_scc
+ ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+ renamable $sgpr4 = S_MOV_B32 %stack.0
+ S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_0_live_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5
+
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc
+ ; GCN: liveins: $sgpr4, $sgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.0
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FI#0 is filler to get a non-0 offset for FI#1
+---
+name: materialize_fi_s_mov_b32_offset_64_dead_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_dead_scc
+ ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 64, implicit-def $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+ S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_dead_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_dead_scc
+ ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+ S_ENDPGM 0, implicit $sgpr4
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5
+
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+ ; GCN: liveins: $sgpr4, $sgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_live_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5
+
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+ ; GCN: liveins: $sgpr4, $sgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FIXME: This is finding a VGPR
+---
+name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 4, alignment: 16, stack-id: default }
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 10
+
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+ ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+
+ renamable $sgpr4 = S_MOV_B32 %stack.0
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
+
+# FIXME: This is clobbering scc
+---
+name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5
+
+ ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+ ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
new file mode 100644
index 0000000000000..74e8b3b47fdb9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -0,0 +1,176 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+ target triple = "amdgcn-amd-amdhsa"
+
+ declare double @killer()
+ define void @bug() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
+
+...
+---
+name: bug
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 4, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 2, name: '', type: default, offset: 8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 8, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 3, name: '', type: default, offset: 12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 12, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 4, name: '', type: default, offset: 16, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 16, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 5, name: '', type: default, offset: 20, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 20, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 6, name: '', type: default, offset: 24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 24, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 7, name: '', type: spill-slot, offset: 32, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 8, name: '', type: spill-slot, offset: 36, size: 8, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 9, name: '', type: spill-slot, offset: 44, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 10, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+ stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 11, name: '', type: spill-slot, offset: 48, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 12, name: '', type: default, offset: 52, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ isEntryFunction: false
+ isChainFunction: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: true
+ hasSpilledVGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ wwmReservedRegs:
+ - '$vgpr63'
+ - '$vgpr40'
+ scavengeFI: '%stack.12'
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+body: |
+ bb.0:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: bug
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr41, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr16 = COPY $sgpr33
+ ; CHECK-NEXT: $sgpr33 = frame-setup COPY $sgpr32
+ ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
+ ; CHECK-NEXT: $vgpr41 = SI_SPILL_S32_TO_VGPR $sgpr16, 0, undef $vgpr41
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 5120, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+ ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
+ ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr33, 6, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+ ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 8, implicit-def $scc
+ ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+ ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+ ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 24, implicit-def $scc
+ ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr41, 0
+ ; CHECK-NEXT: $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; CHECK-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -5120, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
+ ; CHECK-NEXT: SI_RETURN
+ renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+ renamable $sgpr17 = S_MOV_B32 0
+ undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ renamable $sgpr24 = S_MOV_B32 %stack.0
+ renamable $sgpr29 = COPY undef renamable $sgpr30
+ renamable $sgpr20 = S_MOV_B32 %stack.2
+ undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+ undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+ renamable $sgpr31 = S_MOV_B32 %stack.6
+ renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ SI_RETURN
+
+...
>From 7b80c8bd248342f6908ae063a263ed50af7b1020 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 14:02:37 +0530
Subject: [PATCH 2/8] refactored SIRegisterInfo, reduce-frame-index
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 1 -
.../CodeGen/AMDGPU/reduce-frame-index.mir | 116 ++----------------
2 files changed, 11 insertions(+), 106 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 05153383a3a21..b08558ae073a2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2551,7 +2551,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
} else {
TmpResultReg = RS->scavengeRegisterBackwards(
AMDGPU::VGPR_32RegClass, MI, false, 0);
-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
TmpResultReg)
.addImm(ST.getWavefrontSizeLog2())
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 74e8b3b47fdb9..0661bf2778bb7 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -14,127 +14,41 @@
---
name: bug
tracksRegLiveness: true
-frameInfo:
- maxAlignment: 8
- adjustsStack: true
- hasCalls: true
stack:
- { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: 0, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- local-offset: 4, debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
- - { id: 2, name: '', type: default, offset: 8, size: 4, alignment: 4,
+ - { id: 1, name: '', type: default, offset: 8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: 8, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- - { id: 3, name: '', type: default, offset: 12, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- local-offset: 12, debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
- - { id: 4, name: '', type: default, offset: 16, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- local-offset: 16, debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
- - { id: 5, name: '', type: default, offset: 20, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- local-offset: 20, debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
- - { id: 6, name: '', type: default, offset: 24, size: 8, alignment: 8,
+ - { id: 2, name: '', type: default, offset: 24, size: 8, alignment: 8,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: 24, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- - { id: 7, name: '', type: spill-slot, offset: 32, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 8, name: '', type: spill-slot, offset: 36, size: 8, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 9, name: '', type: spill-slot, offset: 44, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 10, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
- stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 11, name: '', type: spill-slot, offset: 48, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- - { id: 12, name: '', type: default, offset: 52, size: 4, alignment: 4,
- stack-id: default, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
machineFunctionInfo:
- isEntryFunction: false
- isChainFunction: false
- memoryBound: false
- waveLimiter: false
- hasSpilledSGPRs: true
- hasSpilledVGPRs: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
- bytesInStackArgArea: 0
- returnsVoid: true
- argumentInfo:
- privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
- dispatchPtr: { reg: '$sgpr4_sgpr5' }
- queuePtr: { reg: '$sgpr6_sgpr7' }
- dispatchID: { reg: '$sgpr10_sgpr11' }
- workGroupIDX: { reg: '$sgpr12' }
- workGroupIDY: { reg: '$sgpr13' }
- workGroupIDZ: { reg: '$sgpr14' }
- LDSKernelId: { reg: '$sgpr15' }
- implicitArgPtr: { reg: '$sgpr8_sgpr9' }
- workItemIDX: { reg: '$vgpr31', mask: 1023 }
- workItemIDY: { reg: '$vgpr31', mask: 1047552 }
- workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
- psInputAddr: 0
- psInputEnable: 0
- mode:
- ieee: true
- dx10-clamp: true
- fp32-input-denormals: true
- fp32-output-denormals: true
- fp64-fp16-input-denormals: true
- fp64-fp16-output-denormals: true
- highBitsOf32BitAddress: 0
- occupancy: 8
- wwmReservedRegs:
- - '$vgpr63'
- - '$vgpr40'
- scavengeFI: '%stack.12'
- vgprForAGPRCopy: ''
- sgprForEXECCopy: ''
- longBranchReservedReg: ''
body: |
bb.0:
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-
; CHECK-LABEL: name: bug
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr41, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr16 = COPY $sgpr33
- ; CHECK-NEXT: $sgpr33 = frame-setup COPY $sgpr32
- ; CHECK-NEXT: $sgpr18_sgpr19 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr18_sgpr19
- ; CHECK-NEXT: $vgpr41 = SI_SPILL_S32_TO_VGPR $sgpr16, 0, undef $vgpr41
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 5120, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
- ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr33, 6, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
- ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 8, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 24, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -144,24 +58,16 @@ body: |
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
- ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr41, 0
- ; CHECK-NEXT: $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 56, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
- ; CHECK-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 60, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
- ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -5120, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
; CHECK-NEXT: SI_RETURN
renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
renamable $sgpr17 = S_MOV_B32 0
undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
renamable $sgpr24 = S_MOV_B32 %stack.0
renamable $sgpr29 = COPY undef renamable $sgpr30
- renamable $sgpr20 = S_MOV_B32 %stack.2
+ renamable $sgpr20 = S_MOV_B32 %stack.1
undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- renamable $sgpr31 = S_MOV_B32 %stack.6
+ renamable $sgpr31 = S_MOV_B32 %stack.2
renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
>From 126bd3078ec68c9b1abcc99d0bb61048d89c9011 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:00:09 +0530
Subject: [PATCH 3/8] refactored,checks with gfx90a
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 6 +-
llvm/test/CodeGen/AMDGPU/frame-index.mir | 147 +++++++++---------
.../CodeGen/AMDGPU/reduce-frame-index.mir | 26 +---
3 files changed, 84 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b08558ae073a2..2f210f4dde7b5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2536,7 +2536,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
IsCopy && IsSALU
? ResultReg
: RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
- MI, false, 0, false);
+ MI, false, 0, /*AllowSpill=*/false);
Register ScaledReg =
TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
Register TmpResultReg = ScaledReg;
@@ -2550,7 +2550,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addImm(Offset);
} else {
TmpResultReg = RS->scavengeRegisterBackwards(
- AMDGPU::VGPR_32RegClass, MI, false, 0);
+ AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHR_B32_e64),
TmpResultReg)
.addImm(ST.getWavefrontSizeLog2())
@@ -2561,7 +2561,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register NewDest =
IsCopy ? ResultReg
: RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, Add, false, 0);
+ AMDGPU::SReg_32RegClass, Add, false, 0, /*AllowSpill=*/true);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
NewDest)
.addReg(TmpResultReg);
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index f4476a497f754..ccc14d03ea9a5 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
---
name: func_add_constant_to_fi_divergent_i32
@@ -359,24 +359,24 @@ body: |
liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
- ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+ ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+ ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+ ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+ ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+ ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+ ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+ ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+ ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+ ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+ ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+ ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+ ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+ ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+ ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+ ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -386,11 +386,10 @@ body: |
; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
- ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -399,22 +398,22 @@ body: |
; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
- ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
- ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
- ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
- ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
- ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
- ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
- ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
- ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
- ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
- ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
- ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
- ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
- ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+ ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+ ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+ ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+ ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+ ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+ ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+ ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+ ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+ ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+ ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+ ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+ ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+ ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+ ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+ ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
@@ -455,24 +454,24 @@ body: |
liveins: $sgpr4, $sgpr5
; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
- ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+ ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+ ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+ ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+ ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+ ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+ ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+ ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+ ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+ ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+ ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+ ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+ ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+ ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+ ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+ ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -482,11 +481,11 @@ body: |
; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -495,22 +494,22 @@ body: |
; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
- ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
- ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
- ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
- ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
- ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
- ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
- ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
- ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
- ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
- ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
- ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
- ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
- ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
- ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+ ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+ ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+ ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+ ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+ ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+ ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+ ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+ ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+ ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+ ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+ ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+ ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+ ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+ ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+ ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 0661bf2778bb7..a68455cb24ffe 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -1,18 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
---- |
- target triple = "amdgcn-amd-amdhsa"
- declare double @killer()
- define void @bug() #0 {
- ret void
- }
-
- attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
-
-...
---
-name: bug
+name: s_copy_frame_index_elimination_failure_pei
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
@@ -35,19 +25,19 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK-LABEL: name: bug
+ ; CHECK-LABEL: name: s_copy_frame_index_elimination_failure_pei
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
- ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
- ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo
+ ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
- ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
@@ -57,7 +47,7 @@ body: |
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
; CHECK-NEXT: SI_RETURN
renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
renamable $sgpr17 = S_MOV_B32 0
@@ -76,7 +66,7 @@ body: |
renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
- dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
SI_RETURN
...
>From eb1e3031c994e4d65e8b01ba436ff6b55c475388 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:04:47 +0530
Subject: [PATCH 4/8] refactored,checks with gfx803
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 18 +--
llvm/test/CodeGen/AMDGPU/frame-index.mir | 147 +++++++++++-----------
2 files changed, 83 insertions(+), 82 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2f210f4dde7b5..c94595f3ddd20 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2532,11 +2532,11 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// We may have 1 free scratch SGPR even though a carry out is
// unavailable. Only one additional mov is needed.
- Register TmpScaledReg =
- IsCopy && IsSALU
- ? ResultReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
- MI, false, 0, /*AllowSpill=*/false);
+ Register TmpScaledReg = IsCopy && IsSALU
+ ? ResultReg
+ : RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32_XM0RegClass, MI,
+ false, 0, /*AllowSpill=*/false);
Register ScaledReg =
TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
Register TmpResultReg = ScaledReg;
@@ -2558,10 +2558,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
TmpResultReg);
Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
- Register NewDest =
- IsCopy ? ResultReg
- : RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, Add, false, 0, /*AllowSpill=*/true);
+ Register NewDest = IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32RegClass, Add,
+ false, 0, /*AllowSpill=*/true);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
NewDest)
.addReg(TmpResultReg);
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index ccc14d03ea9a5..a0301ed267eed 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
---
name: func_add_constant_to_fi_divergent_i32
@@ -359,24 +359,24 @@ body: |
liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
- ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
- ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
- ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
- ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
- ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
- ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
- ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
- ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
- ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
- ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
- ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
- ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
- ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
- ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
- ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
- ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -386,10 +386,11 @@ body: |
; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
- ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -398,22 +399,22 @@ body: |
; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
- ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
- ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
- ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
- ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
- ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
- ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
- ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
- ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
- ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
- ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
- ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
- ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
- ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
- ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
- ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
@@ -454,24 +455,24 @@ body: |
liveins: $sgpr4, $sgpr5
; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
- ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
- ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
- ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
- ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
- ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
- ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
- ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
- ; GCN-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
- ; GCN-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
- ; GCN-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
- ; GCN-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
- ; GCN-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
- ; GCN-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
- ; GCN-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
- ; GCN-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
- ; GCN-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -481,11 +482,11 @@ body: |
; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -494,22 +495,22 @@ body: |
; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
- ; GCN-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
- ; GCN-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
- ; GCN-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
- ; GCN-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
- ; GCN-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
- ; GCN-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
- ; GCN-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
- ; GCN-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
- ; GCN-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
- ; GCN-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
- ; GCN-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
- ; GCN-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
- ; GCN-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
- ; GCN-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
- ; GCN-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
>From aa340100ec3d8022d1422da9f394dab5c0959451 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Fri, 19 Jul 2024 19:53:21 +0530
Subject: [PATCH 5/8] [WIP] updated check for gfx8,gfx9
---
llvm/test/CodeGen/AMDGPU/frame-index.mir | 471 +++++++++++++-----
.../CodeGen/AMDGPU/reduce-frame-index.mir | 8 +-
2 files changed, 358 insertions(+), 121 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index a0301ed267eed..03b972fb2b136 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX8,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX900,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX90A,GCN %s
---
name: func_add_constant_to_fi_divergent_i32
@@ -358,64 +360,181 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
- ; GCN: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
- ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
- ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
- ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
- ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
- ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
- ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
- ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
- ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
- ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
- ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
- ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
- ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
- ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
- ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
- ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
+ ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+ ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+ ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+ ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+ ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+ ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+ ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+ ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+ ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+ ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+ ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+ ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+ ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+ ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+ ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+ ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+ ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+ ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+ ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+ ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+ ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+ ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+ ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+ ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+ ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+ ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+ ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+ ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+ ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+ ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+ ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -454,64 +573,182 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5
- ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
- ; GCN: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- ; GCN-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
- ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
- ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
- ; GCN-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
- ; GCN-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
- ; GCN-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
- ; GCN-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
- ; GCN-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
- ; GCN-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
- ; GCN-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
- ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
- ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
- ; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
- ; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
- ; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
- ; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
- ; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
- ; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
- ; GCN-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
- ; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
- ; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
- ; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
- ; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
- ; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
- ; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
+ ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+ ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+ ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+ ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+ ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+ ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+ ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+ ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+ ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+ ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+ ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+ ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+ ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+ ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+ ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+ ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+ ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+ ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+ ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+ ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+ ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+ ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+ ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+ ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+ ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+ ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+ ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+ ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+ ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+ ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+ ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index a68455cb24ffe..6ae876fd21811 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -30,14 +30,14 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
- ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo
- ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc
+ ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
- ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
>From ab19c1ecbf4a5efe50cb92f1d5b6442d4186d79b Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Wed, 24 Jul 2024 15:44:02 +0530
Subject: [PATCH 6/8] [AMDGPU] this patch fixes all cases
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 29 +-
llvm/test/CodeGen/AMDGPU/frame-index.mir | 76 ++-
.../CodeGen/AMDGPU/reduce-frame-index.mir | 554 +++++++++++++++++-
3 files changed, 611 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index c94595f3ddd20..27485be56b8de 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2555,12 +2555,33 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
TmpResultReg)
.addImm(ST.getWavefrontSizeLog2())
.addReg(FrameReg);
- auto Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32),
- TmpResultReg);
- Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+
+ MachineInstrBuilder Add;
+ if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) ==
+ nullptr) {
+ // VCC is live and no SGPR is free.
+ // since emergency stack slot is already used for spilling VGPR
+ // scavenged? This a way around to avoid carry, need follow-up.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+ .addImm(Offset);
+ Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_I32_I24_e64),
+ TmpResultReg)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(1)
+ .addReg(ResultReg, RegState::Kill)
+ .addImm(0);
+ } else if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+ BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
+ .addImm(Offset);
+ Add.addReg(ResultReg, RegState::Kill)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(0);
+ } else
+ Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+
Register NewDest = IsCopy ? ResultReg
: RS->scavengeRegisterBackwards(
- AMDGPU::SReg_32RegClass, Add,
+ AMDGPU::SReg_32RegClass, *Add,
false, 0, /*AllowSpill=*/true);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
NewDest)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 03b972fb2b136..a41eb26db899f 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -305,14 +305,33 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5
- ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
- ; GCN: liveins: $sgpr4, $sgpr5
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
- ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+ ; GFX8: liveins: $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+ ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+ ; GFX900: liveins: $sgpr4, $sgpr5
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc
+ ; GFX90A: liveins: $sgpr4, $sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
renamable $sgpr4 = S_MOV_B32 %stack.1
S_ENDPGM 0, implicit $sgpr4, implicit $scc
@@ -331,14 +350,33 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5
- ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
- ; GCN: liveins: $sgpr4, $sgpr5
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GCN-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
- ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+ ; GFX8: liveins: $sgpr4, $sgpr5
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
+ ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+ ; GFX900: liveins: $sgpr4, $sgpr5
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc
+ ; GFX90A: liveins: $sgpr4, $sgpr5
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
renamable $sgpr4 = S_MOV_B32 %stack.1
S_ENDPGM 0, implicit $sgpr4, implicit $scc
@@ -390,7 +428,8 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+ ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
@@ -603,7 +642,8 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
index 6ae876fd21811..4f08011d7b009 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
@@ -1,5 +1,7 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX8,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX900,GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=GFX90A,GCN %s
---
name: s_copy_frame_index_elimination_failure_pei
@@ -25,30 +27,30 @@ machineFunctionInfo:
body: |
bb.0:
liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK-LABEL: name: s_copy_frame_index_elimination_failure_pei
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
- ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
- ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
- ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
- ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
- ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
- ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
- ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
- ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
- ; CHECK-NEXT: SI_RETURN
+ ; GCN-LABEL: name: s_copy_frame_index_elimination_failure_pei
+ ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+ ; GCN-NEXT: renamable $sgpr17 = S_MOV_B32 0
+ ; GCN-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ ; GCN-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; GCN-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+ ; GCN-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; GCN-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
+ ; GCN-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+ ; GCN-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+ ; GCN-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; GCN-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
+ ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+ ; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: SI_RETURN
renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
renamable $sgpr17 = S_MOV_B32 0
undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
@@ -70,3 +72,503 @@ body: |
SI_RETURN
...
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5, $vgpr0
+
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+ ; GFX8-NEXT: $vgpr0, dead $sgpr0_sgpr1 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc
+ ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...
+
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5, $vgpr0
+
+
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX8-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+ ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX900-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+ ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
+ ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
+ ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
+ ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec
+ ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec
+ ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec
+ ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec
+ ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec
+ ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec
+ ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec
+ ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec
+ ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec
+ ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec
+ ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec
+ ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec
+ ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec
+ ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+ ; GFX90A-NEXT: $vgpr1 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 64, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ ; GFX90A-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec
+ ; GFX90A-NEXT: $vgpr62 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec
+ ; GFX90A-NEXT: $vgpr61 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec
+ ; GFX90A-NEXT: $vgpr60 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec
+ ; GFX90A-NEXT: $vgpr59 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec
+ ; GFX90A-NEXT: $vgpr58 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec
+ ; GFX90A-NEXT: $vgpr57 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec
+ ; GFX90A-NEXT: $vgpr56 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec
+ ; GFX90A-NEXT: $vgpr47 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec
+ ; GFX90A-NEXT: $vgpr46 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec
+ ; GFX90A-NEXT: $vgpr45 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec
+ ; GFX90A-NEXT: $vgpr44 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
+ ; GFX90A-NEXT: $vgpr43 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
+ ; GFX90A-NEXT: $vgpr42 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
+ ; GFX90A-NEXT: $vgpr41 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
+ ; GFX90A-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit-def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+
+ V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+ S_NOP 0, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+ S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
+ S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
+ S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
+ S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+...
+
+
+---
+name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 64, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5, $vgpr0
+
+
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
+ ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_sgpr
+ ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+
+ V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+
+ S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...
+
+---
+name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: default, size: 68, alignment: 16, stack-id: default }
+ - { id: 1, type: default, size: 4, alignment: 4, stack-id: default }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr4, $sgpr5, $vgpr0
+
+ ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+ ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
+ ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX8-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+ ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX900-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+ ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX900-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ ;
+ ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr
+ ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ ; GFX90A-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec
+ ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; GFX90A-NEXT: S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ ; GFX90A-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+ S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ S_NOP 0, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ S_NOP 0, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ S_NOP 0, implicit-def $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ S_NOP 0, implicit-def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+
+ V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+ S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
+ renamable $sgpr4 = S_MOV_B32 %stack.1
+
+ S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
+ S_NOP 0, implicit $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ S_NOP 0, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39
+ S_NOP 0, implicit $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ S_NOP 0, implicit $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
+ S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
+
+...
>From ce41a137c0d6527a4751da8413c8206e702a0d53 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Wed, 24 Jul 2024 16:01:08 +0530
Subject: [PATCH 7/8] [AMDGPU] renamed test file
---
...e-index.mir => s_copy_frame_index_elimination_failure_pei.mir} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/CodeGen/AMDGPU/{reduce-frame-index.mir => s_copy_frame_index_elimination_failure_pei.mir} (100%)
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir b/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/reduce-frame-index.mir
rename to llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
>From 4254f3b2d568a5606dd61f03ee0e58b4417f15b2 Mon Sep 17 00:00:00 2001
From: PankajDwivedi-25 <pankajkumar.divedi at amd.com>
Date: Thu, 25 Jul 2024 01:25:15 +0530
Subject: [PATCH 8/8] [WIP] replaced v_mad_i32_i24 with unsigned version,
refactored nested loop
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 23 +++++++++++--------
...re_pei.mir => s_mov_lowering_handling.mir} | 4 ++--
2 files changed, 15 insertions(+), 12 deletions(-)
rename llvm/test/CodeGen/AMDGPU/{s_copy_frame_index_elimination_failure_pei.mir => s_mov_lowering_handling.mir} (99%)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 27485be56b8de..0baec28c98b97 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2557,27 +2557,30 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(FrameReg);
MachineInstrBuilder Add;
- if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) ==
+ if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS)) !=
nullptr) {
+ if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
+ BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32),
+ ResultReg)
+ .addImm(Offset);
+ Add.addReg(ResultReg, RegState::Kill)
+ .addReg(TmpResultReg, RegState::Kill)
+ .addImm(0);
+ } else
+ Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+ } else {
// VCC is live and no SGPR is free.
// since emergency stack slot is already used for spilling VGPR
// scavenged? This a way around to avoid carry, need follow-up.
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
.addImm(Offset);
- Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_I32_I24_e64),
+ Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64),
TmpResultReg)
.addReg(TmpResultReg, RegState::Kill)
.addImm(1)
.addReg(ResultReg, RegState::Kill)
.addImm(0);
- } else if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
- BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg)
- .addImm(Offset);
- Add.addReg(ResultReg, RegState::Kill)
- .addReg(TmpResultReg, RegState::Kill)
- .addImm(0);
- } else
- Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill);
+ }
Register NewDest = IsCopy ? ResultReg
: RS->scavengeRegisterBackwards(
diff --git a/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir b/llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
rename to llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
index 4f08011d7b009..d19f24da04017 100644
--- a/llvm/test/CodeGen/AMDGPU/s_copy_frame_index_elimination_failure_pei.mir
+++ b/llvm/test/CodeGen/AMDGPU/s_mov_lowering_handling.mir
@@ -370,7 +370,7 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
- ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -485,7 +485,7 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_LSHR_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
- ; GFX8-NEXT: $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 1, killed $sgpr4, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
More information about the llvm-commits
mailing list