[llvm-branch-commits] [llvm] AMDGPU: Support VALU add instructions in localstackalloc (PR #101692)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 2 12:46:33 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/101692
>From 46e241ee3ef338031e7aa08b0289f0ec60c7940f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 2 Aug 2024 15:49:44 +0400
Subject: [PATCH] AMDGPU: Support VALU add instructions in localstackalloc
Pre-enable this optimization before allowing folds of frame
indexes into add instructions. Disables this fold when using
scratch instructions for now. I see some code size improvements
with it, but the optimization needs to be smarter about the
uses depending on the register classes.
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 164 ++++
llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 12 +-
...local-stack-alloc-add-references.gfx10.mir | 80 ++
.../local-stack-alloc-add-references.gfx8.mir | 863 ++++++++++++++++++
.../local-stack-alloc-add-references.gfx9.mir | 522 +++++++++++
6 files changed, 1641 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ba49f4a309ebb..f5ac8517c5069 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -797,6 +797,23 @@ int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_ADD_U32_e32:
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_CO_U32_e32: {
+ int OtherIdx = Idx == 1 ? 2 : 1;
+ const MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+ return OtherOp.isImm() ? OtherOp.getImm() : 0;
+ }
+ case AMDGPU::V_ADD_CO_U32_e64: {
+ int OtherIdx = Idx == 2 ? 3 : 2;
+ const MachineOperand &OtherOp = MI->getOperand(OtherIdx);
+ return OtherOp.isImm() ? OtherOp.getImm() : 0;
+ }
+ default:
+ break;
+ }
+
if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
return 0;
@@ -809,7 +826,60 @@ int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
return getScratchInstrOffset(MI);
}
+static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI,
+ const MachineInstr &MI) {
+ assert(MI.getDesc().isAdd());
+ const MachineOperand &Src0 = MI.getOperand(1);
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ if (Src0.isFI()) {
+ return Src1.isImm() || (Src1.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),
+ Src1.getReg()));
+ }
+
+ if (Src1.isFI()) {
+ return Src0.isImm() || (Src0.isReg() && TRI.isVGPR(MI.getMF()->getRegInfo(),
+ Src0.getReg()));
+ }
+
+ return false;
+}
+
bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ // TODO: Handle v_add_co_u32, v_or_b32, v_and_b32 and scalar opcodes.
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_ADD_U32_e32: {
+ // TODO: We could handle this but it requires work to avoid violating
+ // operand restrictions.
+ if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&
+ !isFIPlusImmOrVGPR(*this, *MI))
+ return false;
+ [[fallthrough]];
+ }
+ case AMDGPU::V_ADD_U32_e64:
+ // FIXME: This optimization is barely profitable enableFlatScratch as-is.
+ //
+ // Much of the benefit with the MUBUF handling is we avoid duplicating the
+ // shift of the frame register, which isn't needed with scratch.
+ //
+ // materializeFrameBaseRegister doesn't know the register classes of the
+ // uses, and unconditionally uses an s_add_i32, which will end up using a
+ // copy for the vector uses.
+ return !ST.enableFlatScratch();
+ case AMDGPU::V_ADD_CO_U32_e32:
+ if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&
+ !isFIPlusImmOrVGPR(*this, *MI))
+ return false;
+ // We can't deal with the case where the carry out has a use (though this
+ // should never happen)
+ return MI->getOperand(3).isDead();
+ case AMDGPU::V_ADD_CO_U32_e64:
+ // TODO: Should we check use_empty instead?
+ return MI->getOperand(1).isDead();
+ default:
+ break;
+ }
+
if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
return false;
@@ -860,6 +930,8 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx);
if (ST.enableFlatScratch() ) {
+ // FIXME: Mark scc as dead
+ // FIXME: Make sure scc isn't live in.
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg);
@@ -877,6 +949,86 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
const SIInstrInfo *TII = ST.getInstrInfo();
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_ADD_U32_e32:
+ case AMDGPU::V_ADD_CO_U32_e32: {
+ MachineOperand *FIOp = &MI.getOperand(2);
+ MachineOperand *ImmOp = &MI.getOperand(1);
+ if (!FIOp->isFI())
+ std::swap(FIOp, ImmOp);
+
+ if (!ImmOp->isImm()) {
+ assert(Offset == 0);
+ FIOp->ChangeToRegister(BaseReg, false);
+ TII->legalizeOperandsVOP2(MI.getMF()->getRegInfo(), MI);
+ return;
+ }
+
+ int64_t TotalOffset = ImmOp->getImm() + Offset;
+ if (TotalOffset == 0) {
+ MI.setDesc(TII->get(AMDGPU::COPY));
+ for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)
+ MI.removeOperand(I);
+
+ MI.getOperand(1).ChangeToRegister(BaseReg, false);
+ return;
+ }
+
+ ImmOp->setImm(TotalOffset);
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // FIXME: materializeFrameBaseRegister does not know the register class of
+ // the uses of the frame index, and assumes SGPR for enableFlatScratch. Emit
+ // a copy so we have a legal operand and hope the register coalescer can
+ // clean it up.
+ if (isSGPRReg(MRI, BaseReg)) {
+ Register BaseRegVGPR =
+ MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), BaseRegVGPR)
+ .addReg(BaseReg);
+ MI.getOperand(2).ChangeToRegister(BaseRegVGPR, false);
+ } else {
+ MI.getOperand(2).ChangeToRegister(BaseReg, false);
+ }
+ return;
+ }
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_CO_U32_e64: {
+ int Src0Idx = MI.getNumExplicitDefs();
+ MachineOperand *FIOp = &MI.getOperand(Src0Idx);
+ MachineOperand *ImmOp = &MI.getOperand(Src0Idx + 1);
+ if (!FIOp->isFI())
+ std::swap(FIOp, ImmOp);
+
+ if (!ImmOp->isImm()) {
+ FIOp->ChangeToRegister(BaseReg, false);
+ TII->legalizeOperandsVOP3(MI.getMF()->getRegInfo(), MI);
+ return;
+ }
+
+ int64_t TotalOffset = ImmOp->getImm() + Offset;
+ if (TotalOffset == 0) {
+ MI.setDesc(TII->get(AMDGPU::COPY));
+
+ for (unsigned I = MI.getNumOperands() - 1; I != 1; --I)
+ MI.removeOperand(I);
+
+ MI.getOperand(1).ChangeToRegister(BaseReg, false);
+ } else {
+ FIOp->ChangeToRegister(BaseReg, false);
+ ImmOp->setImm(TotalOffset);
+ }
+
+ return;
+ }
+ default:
+ break;
+ }
+
bool IsFlat = TII->isFLATScratch(MI);
#ifndef NDEBUG
@@ -925,6 +1077,18 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Register BaseReg,
int64_t Offset) const {
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_ADD_U32_e32:
+ case AMDGPU::V_ADD_CO_U32_e32:
+ return true;
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_CO_U32_e64:
+ return ST.hasVOP3Literal() || AMDGPU::isInlinableIntLiteral(Offset);
+ default:
+ break;
+ }
+
if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index de3191bd91df6..2e73a1a15f6b3 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -640,12 +640,12 @@ class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
let Defs = [SCC] in { // Carry out goes to SCC
-let isCommutable = 1 in {
+let isCommutable = 1, isAdd = 1 in {
def S_ADD_U32 : SOP2_32 <"s_add_u32">;
def S_ADD_I32 : SOP2_32 <"s_add_i32",
[(set i32:$sdst, (UniformBinFrag<add> SSrc_b32:$src0, SSrc_b32:$src1))]
>;
-} // End isCommutable = 1
+} // End isCommutable = 1, isAdd = 1
def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
def S_SUB_I32 : SOP2_32 <"s_sub_i32",
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 44eb5f5abafe0..d17b4f2408131 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -763,7 +763,11 @@ def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
+
+let isAdd = 1 in {
+ defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
+}
+
defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
@@ -772,7 +776,11 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
-defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
+
+let isAdd = 1 in {
+ defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
+}
+
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
new file mode 100644
index 0000000000000..0c31b36e90cb0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx10.mir
@@ -0,0 +1,80 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
+
+---
+name: local_stack_alloc__v_add_u32_e64__literal_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], 256, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets
+ ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 256, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 %stack.0, 512, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_]], -156, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__literal_offsets_commute
+ ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_2]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e64 256, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 512, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ %2:vgpr_32 = V_ADD_U32_e64 %stack.0, 100, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %2
+ SI_RETURN
+
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir
new file mode 100644
index 0000000000000..b7ade2147e40c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir
@@ -0,0 +1,863 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX803 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX900 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+wavefrontsize64 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+ ; GFX803: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_CO_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+ ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets
+ ; GFX12: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+ ; GFX803: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+ ; GFX900: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+ ; GFX940: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+ ; GFX10: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc
+ ; GFX12: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1, implicit $vcc
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+ ; GFX803: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_CO_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+ ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[V_ADD_U32_e64_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets
+ ; GFX12: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_CO_U32_e32 8, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 16, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+ ; GFX803: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX803-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_CO_U32_e64_]], 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+ ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets
+ ; GFX12: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def $scc
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+ ; GFX803: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX803-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]]
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+ ; GFX900: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX900-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]]
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+ ; GFX940: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX940-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]]
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+ ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX10-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]]
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc
+ ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX12-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]]
+ %0:vgpr_32, %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN implicit %2
+
+...
+
+---
+name: local_stack_alloc__s_add_i32__literal_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets
+ ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets
+ ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets
+ ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets
+ ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets
+ ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, %0
+ %1:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__s_add_i32__inline_imm_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets
+ ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]]
+ ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets
+ ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]]
+ ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets
+ ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]]
+ ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets
+ ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]]
+ ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets
+ ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]]
+ ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:SREG_32 */, %0
+ %1:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:SREG_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX803-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+ ; GFX803: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX803-NEXT: S_NOP 0, implicit $scc
+ ; GFX803-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX803-NEXT: SI_RETURN implicit $scc
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+ ; GFX900: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX900-NEXT: S_NOP 0, implicit $scc
+ ; GFX900-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX900-NEXT: SI_RETURN implicit $scc
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+ ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX940-NEXT: S_NOP 0, implicit $scc
+ ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX940-NEXT: SI_RETURN implicit $scc
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+ ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX10-NEXT: S_NOP 0, implicit $scc
+ ; GFX10-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX10-NEXT: SI_RETURN implicit $scc
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc
+ ; GFX12: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]]
+ ; GFX12-NEXT: S_NOP 0, implicit $scc
+ ; GFX12-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]]
+ ; GFX12-NEXT: SI_RETURN implicit $scc
+ %0:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, %0
+ S_NOP 0, implicit $scc
+ %1:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, %1
+ SI_RETURN implicit $scc
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+ ; GFX803: liveins: $vgpr0
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+ ; GFX900: liveins: $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+ ; GFX940: liveins: $vgpr0
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %vgpr_offset:vgpr_32 = COPY $vgpr0
+ %0:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 %vgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+ ; GFX803: liveins: $vgpr0
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+ ; GFX900: liveins: $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+ ; GFX940: liveins: $vgpr0
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %vgpr_offset:vgpr_32 = COPY $vgpr0
+ %0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %vgpr_offset, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+ ; GFX803: liveins: $sgpr8
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[COPY]], implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]]
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, [[COPY1]], implicit-def dead $vcc, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+ ; GFX803: liveins: $sgpr8
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY]], 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY1]], 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[S_MOV_B32_]], 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[S_MOV_B32_]], 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX803-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+ ; GFX803: liveins: $sgpr8
+ ; GFX803-NEXT: {{ $}}
+ ; GFX803-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX803-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX803-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX803-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX803-NEXT: SI_RETURN
+ ;
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX900-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset
+ ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY1]], 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32, dead %2:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir
new file mode 100644
index 0000000000000..dd1f8eb2d781a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir
@@ -0,0 +1,522 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX900 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s
+
+---
+name: local_stack_alloc__v_add_u32_e32__literal_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, [[V_ADD_U32_e64_]], implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets
+ ; GFX940: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, [[V_ADD_U32_e64_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets
+ ; GFX12: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, [[V_ADD_U32_e64_]], implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets
+ ; GFX940: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, [[V_ADD_U32_e64_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets
+ ; GFX12: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets
+ ; GFX940: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets
+ ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e32__vgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets
+ ; GFX900: liveins: $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets
+ ; GFX940: liveins: $vgpr0
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %vgpr_offset:vgpr_32 = COPY $vgpr0
+ %0:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute
+ ; GFX900: liveins: $vgpr0
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute
+ ; GFX940: liveins: $vgpr0
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute
+ ; GFX10: liveins: $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_MOV_B32_e32_]], %vgpr_offset, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0
+ ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %vgpr_offset:vgpr_32 = COPY $vgpr0
+ %0:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e32__sgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX900-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX10-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, [[V_MOV_B32_e32_]], implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e64__sgpr_offsets
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX940-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], %sgpr_offset, 0, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8
+ ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %sgpr_offset:sreg_32 = COPY $sgpr8
+ %0:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
+
+---
+name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX900-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier
+ ; GFX900: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX900-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX900-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX900-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 1, implicit $exec
+ ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX900-NEXT: SI_RETURN
+ ;
+ ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier
+ ; GFX940: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec
+ ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX940-NEXT: SI_RETURN
+ ;
+ ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier
+ ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[S_MOV_B32_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_ADD_U32_e64_]]
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]]
+ ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 8, [[V_ADD_U32_e64_]], 1, implicit $exec
+ ; GFX10-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX10-NEXT: SI_RETURN
+ ;
+ ; GFX12-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier
+ ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]]
+ ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec
+ ; GFX12-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]]
+ ; GFX12-NEXT: SI_RETURN
+ %0:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, /*clamp*/1, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %0
+ %1:vgpr_32 = V_ADD_U32_e64 16, %stack.0, /*clamp*/1, implicit $exec
+ INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, %1
+ SI_RETURN
+
+...
More information about the llvm-branch-commits
mailing list