[llvm] [AMDGPU] Support alloca in AS0 (PR #136584)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 22 17:36:36 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/136584
>From 22cdc022c12cd0a1583b4d9db7cd23210d58bfa9 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 22 Apr 2025 20:36:20 -0400
Subject: [PATCH] [AMDGPU] Support alloca in AS0
This PR lowers an alloca in AS0 to an alloca in AS5 followed by an addrspacecast
back to AS0.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 3 +
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 33 ++-
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 5 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 29 ++-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 +
llvm/test/CodeGen/AMDGPU/alloca-as0.ll | 208 ++++++++++++++++++
.../AMDGPU/assert-wrong-alloca-addrspace.ll | 16 --
7 files changed, 276 insertions(+), 19 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/alloca-as0.ll
delete mode 100644 llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2846405a2538c..ddc61a219eb83 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -385,9 +385,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
+ setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
+
// For R600, this is totally unsupported, just custom lower to produce an
// error.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index beb6432170970..4d7002db2cca7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -912,12 +912,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0, 32)
.clampMaxNumElements(0, S32, 16);
- getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({PrivatePtr});
+ getActionDefinitionsBuilder(G_FRAME_INDEX)
+ .legalFor({PrivatePtr})
+ .customFor({FlatPtr});
// If the amount is divergent, we have to do a wave reduction to get the
// maximum value, so this is expanded during RegBankSelect.
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
- .legalFor({{PrivatePtr, S32}});
+ .legalFor({{PrivatePtr, S32}})
+ .customFor({FlatPtr, S32});
getActionDefinitionsBuilder(G_STACKSAVE)
.customFor({PrivatePtr});
@@ -2221,6 +2224,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeTrap(MI, MRI, B);
case TargetOpcode::G_DEBUGTRAP:
return legalizeDebugTrap(MI, MRI, B);
+ case TargetOpcode::G_FRAME_INDEX:
+ return legalizeFrameIndex(MI, MRI, B);
+ case TargetOpcode::G_DYN_STACKALLOC:
+ return legalizeDynStackAlloc(MI, MRI, B);
default:
return false;
}
@@ -7668,3 +7675,25 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
+
+bool AMDGPULegalizerInfo::legalizeFrameIndex(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ MachineInstrBuilder FI = B.buildFrameIndex(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), MI.getOperand(1).getIndex());
+ B.buildAddrSpaceCast(MI.getOperand(0).getReg(), FI);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeDynStackAlloc(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ MachineInstrBuilder Size = B.buildTrunc(S32, MI.getOperand(1));
+ Align Alignment(MI.getOperand(2).getImm());
+ MachineInstrBuilder DynStackAlloc = B.buildDynStackAlloc(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), Size, Alignment);
+ B.buildAddrSpaceCast(MI.getOperand(0).getReg(), DynStackAlloc);
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1f4e02b0d600a..55250530689cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -246,6 +246,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;
+
+ bool legalizeFrameIndex(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeDynStackAlloc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
};
} // End llvm namespace.
#endif
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e3cd5ca6692d..3f2e5fbce03a1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4117,6 +4117,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
InVals, /*IsThisReturn=*/false, SDValue());
}
+SDValue SITargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+ // Since address space information is lost here, we assume that an i64 frame
+ // index comes from an alloca in AS0.
+ SDLoc DL(Op);
+ auto *FI = cast<FrameIndexSDNode>(Op);
+ SDValue TFI = DAG.getFrameIndex(FI->getIndex(), MVT::i32);
+ return DAG.getAddrSpaceCast(DL, Op.getValueType(), TFI,
+ AMDGPUAS::PRIVATE_ADDRESS,
+ AMDGPUAS::FLAT_ADDRESS);
+}
+
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
// except for:
// 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
@@ -4129,13 +4140,27 @@ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDLoc dl(Op);
EVT VT = Op.getValueType();
SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ // Since address space information is lost here, we assume that an i64 dynamic
+ // alloca comes from an alloca in AS0.
+ if (VT == MVT::i64) {
+ SDValue Align = Op.getOperand(2);
+ Size = DAG.getZExtOrTrunc(Size, dl, MVT::i32);
+ SDValue Ops[] = {Chain, Size, Align};
+ SDValue DynAlloc =
+ DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, {MVT::i32, MVT::Other}, Ops);
+ SDValue Cast = DAG.getAddrSpaceCast(
+ dl, VT, DynAlloc, AMDGPUAS::PRIVATE_ADDRESS, AMDGPUAS::FLAT_ADDRESS);
+ return DAG.getMergeValues({Cast, DynAlloc.getValue(1)}, dl);
+ }
+
Register SPReg = Info->getStackPtrOffsetReg();
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
- SDValue Size = Op.getOperand(1);
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
@@ -6087,6 +6112,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
return lowerXMUL_LOHI(Op, DAG);
+ case ISD::FrameIndex:
+ return lowerFrameIndex(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::STACKSAVE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c42366a1c04c8..f08cd15282c94 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -428,6 +428,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
new file mode 100644
index 0000000000000..b0ff79c89d82e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s --check-prefix=ISEL
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel %s -o - | FileCheck %s --check-prefix=GI
+
+declare void @bar(ptr)
+
+define i32 @static_alloca() {
+; ISEL-LABEL: static_alloca:
+; ISEL: ; %bb.0:
+; ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_mov_b32 s16, s33
+; ISEL-NEXT: s_mov_b32 s33, s32
+; ISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
+; ISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; ISEL-NEXT: s_mov_b64 exec, s[18:19]
+; ISEL-NEXT: s_addk_i32 s32, 0x400
+; ISEL-NEXT: v_writelane_b32 v40, s16, 4
+; ISEL-NEXT: s_getpc_b64 s[16:17]
+; ISEL-NEXT: s_add_u32 s16, s16, bar at gotpcrel32@lo+4
+; ISEL-NEXT: s_addc_u32 s17, s17, bar at gotpcrel32@hi+12
+; ISEL-NEXT: v_writelane_b32 v40, s30, 0
+; ISEL-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; ISEL-NEXT: s_lshr_b32 s18, s33, 6
+; ISEL-NEXT: v_lshrrev_b32_e64 v0, 6, s33
+; ISEL-NEXT: v_writelane_b32 v40, s31, 1
+; ISEL-NEXT: s_cmp_lg_u32 s18, -1
+; ISEL-NEXT: v_readfirstlane_b32 s18, v0
+; ISEL-NEXT: v_writelane_b32 v40, s34, 2
+; ISEL-NEXT: s_cselect_b32 s34, s18, 0
+; ISEL-NEXT: s_mov_b64 s[18:19], src_private_base
+; ISEL-NEXT: v_writelane_b32 v40, s35, 3
+; ISEL-NEXT: s_cselect_b32 s35, s19, 0
+; ISEL-NEXT: v_mov_b32_e32 v0, s34
+; ISEL-NEXT: v_mov_b32_e32 v1, s35
+; ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; ISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; ISEL-NEXT: v_mov_b32_e32 v0, s34
+; ISEL-NEXT: v_mov_b32_e32 v1, s35
+; ISEL-NEXT: flat_load_dword v0, v[0:1]
+; ISEL-NEXT: v_readlane_b32 s35, v40, 3
+; ISEL-NEXT: v_readlane_b32 s34, v40, 2
+; ISEL-NEXT: v_readlane_b32 s31, v40, 1
+; ISEL-NEXT: v_readlane_b32 s30, v40, 0
+; ISEL-NEXT: s_mov_b32 s32, s33
+; ISEL-NEXT: v_readlane_b32 s4, v40, 4
+; ISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; ISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; ISEL-NEXT: s_mov_b64 exec, s[6:7]
+; ISEL-NEXT: s_mov_b32 s33, s4
+; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GI-LABEL: static_alloca:
+; GI: ; %bb.0:
+; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GI-NEXT: s_mov_b32 s16, s33
+; GI-NEXT: s_mov_b32 s33, s32
+; GI-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GI-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GI-NEXT: s_mov_b64 exec, s[18:19]
+; GI-NEXT: v_writelane_b32 v40, s16, 4
+; GI-NEXT: v_writelane_b32 v40, s30, 0
+; GI-NEXT: v_writelane_b32 v40, s31, 1
+; GI-NEXT: s_addk_i32 s32, 0x400
+; GI-NEXT: v_writelane_b32 v40, s34, 2
+; GI-NEXT: s_lshr_b32 s34, s33, 6
+; GI-NEXT: s_getpc_b64 s[18:19]
+; GI-NEXT: s_add_u32 s18, s18, bar at gotpcrel32@lo+4
+; GI-NEXT: s_addc_u32 s19, s19, bar at gotpcrel32@hi+12
+; GI-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
+; GI-NEXT: s_mov_b64 s[16:17], src_private_base
+; GI-NEXT: v_lshrrev_b32_e64 v0, 6, s33
+; GI-NEXT: v_mov_b32_e32 v1, s17
+; GI-NEXT: v_writelane_b32 v40, s35, 3
+; GI-NEXT: s_mov_b32 s35, s17
+; GI-NEXT: s_waitcnt lgkmcnt(0)
+; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GI-NEXT: v_mov_b32_e32 v0, s34
+; GI-NEXT: v_mov_b32_e32 v1, s35
+; GI-NEXT: flat_load_dword v0, v[0:1]
+; GI-NEXT: v_readlane_b32 s35, v40, 3
+; GI-NEXT: v_readlane_b32 s34, v40, 2
+; GI-NEXT: v_readlane_b32 s31, v40, 1
+; GI-NEXT: v_readlane_b32 s30, v40, 0
+; GI-NEXT: s_mov_b32 s32, s33
+; GI-NEXT: v_readlane_b32 s4, v40, 4
+; GI-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GI-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GI-NEXT: s_mov_b64 exec, s[6:7]
+; GI-NEXT: s_mov_b32 s33, s4
+; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca i32, align 4
+ call void @bar(ptr %alloca)
+ %load = load i32, ptr %alloca
+ ret i32 %load
+}
+
+define i32 @dynamic_alloca(i32 %n) {
+; ISEL-LABEL: dynamic_alloca:
+; ISEL: ; %bb.0:
+; ISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_mov_b32 s16, s33
+; ISEL-NEXT: s_mov_b32 s33, s32
+; ISEL-NEXT: s_or_saveexec_b64 s[18:19], -1
+; ISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; ISEL-NEXT: s_mov_b64 exec, s[18:19]
+; ISEL-NEXT: v_writelane_b32 v40, s16, 2
+; ISEL-NEXT: v_mad_u64_u32 v[0:1], s[16:17], v0, 4, 15
+; ISEL-NEXT: v_writelane_b32 v40, s30, 0
+; ISEL-NEXT: s_mov_b32 s18, 0
+; ISEL-NEXT: v_and_b32_e32 v0, -16, v0
+; ISEL-NEXT: s_mov_b64 s[16:17], exec
+; ISEL-NEXT: s_addk_i32 s32, 0x400
+; ISEL-NEXT: v_writelane_b32 v40, s31, 1
+; ISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; ISEL-NEXT: s_ff1_i32_b64 s19, s[16:17]
+; ISEL-NEXT: v_readlane_b32 s20, v0, s19
+; ISEL-NEXT: s_bitset0_b64 s[16:17], s19
+; ISEL-NEXT: s_max_u32 s18, s18, s20
+; ISEL-NEXT: s_cmp_lg_u64 s[16:17], 0
+; ISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; ISEL-NEXT: ; %bb.2:
+; ISEL-NEXT: s_cmp_lg_u32 s32, -1
+; ISEL-NEXT: s_mov_b64 s[16:17], src_private_base
+; ISEL-NEXT: s_cselect_b32 s19, s32, 0
+; ISEL-NEXT: s_cselect_b32 s20, s17, 0
+; ISEL-NEXT: s_getpc_b64 s[16:17]
+; ISEL-NEXT: s_add_u32 s16, s16, bar at gotpcrel32@lo+4
+; ISEL-NEXT: s_addc_u32 s17, s17, bar at gotpcrel32@hi+12
+; ISEL-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; ISEL-NEXT: v_mov_b32_e32 v0, s32
+; ISEL-NEXT: v_lshl_add_u32 v0, s18, 6, v0
+; ISEL-NEXT: v_readfirstlane_b32 s18, v0
+; ISEL-NEXT: v_mov_b32_e32 v0, s19
+; ISEL-NEXT: v_mov_b32_e32 v1, s20
+; ISEL-NEXT: s_mov_b32 s32, s18
+; ISEL-NEXT: s_waitcnt lgkmcnt(0)
+; ISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; ISEL-NEXT: v_mov_b32_e32 v0, 0
+; ISEL-NEXT: v_readlane_b32 s31, v40, 1
+; ISEL-NEXT: v_readlane_b32 s30, v40, 0
+; ISEL-NEXT: s_mov_b32 s32, s33
+; ISEL-NEXT: v_readlane_b32 s4, v40, 2
+; ISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; ISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; ISEL-NEXT: s_mov_b64 exec, s[6:7]
+; ISEL-NEXT: s_mov_b32 s33, s4
+; ISEL-NEXT: s_waitcnt vmcnt(0)
+; ISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GI-LABEL: dynamic_alloca:
+; GI: ; %bb.0:
+; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GI-NEXT: s_mov_b32 s16, s33
+; GI-NEXT: s_mov_b32 s33, s32
+; GI-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GI-NEXT: s_mov_b64 exec, s[18:19]
+; GI-NEXT: v_mov_b32_e32 v1, 0
+; GI-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GI-NEXT: v_writelane_b32 v40, s16, 2
+; GI-NEXT: v_add_co_u32_e32 v0, vcc, 15, v0
+; GI-NEXT: v_writelane_b32 v40, s30, 0
+; GI-NEXT: s_mov_b32 s18, 0
+; GI-NEXT: v_and_b32_e32 v0, -16, v0
+; GI-NEXT: s_mov_b64 s[16:17], exec
+; GI-NEXT: s_addk_i32 s32, 0x400
+; GI-NEXT: v_writelane_b32 v40, s31, 1
+; GI-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GI-NEXT: s_ff1_i32_b64 s19, s[16:17]
+; GI-NEXT: v_readlane_b32 s20, v0, s19
+; GI-NEXT: s_bitset0_b64 s[16:17], s19
+; GI-NEXT: s_max_u32 s18, s18, s20
+; GI-NEXT: s_cmp_lg_u64 s[16:17], 0
+; GI-NEXT: s_cbranch_scc1 .LBB1_1
+; GI-NEXT: ; %bb.2:
+; GI-NEXT: s_mov_b32 s16, s32
+; GI-NEXT: s_lshl_b32 s17, s18, 6
+; GI-NEXT: s_add_u32 s32, s16, s17
+; GI-NEXT: s_mov_b64 s[18:19], src_private_base
+; GI-NEXT: s_mov_b32 s17, s19
+; GI-NEXT: s_cmp_lg_u32 s16, -1
+; GI-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
+; GI-NEXT: s_getpc_b64 s[18:19]
+; GI-NEXT: s_add_u32 s18, s18, bar at gotpcrel32@lo+4
+; GI-NEXT: s_addc_u32 s19, s19, bar at gotpcrel32@hi+12
+; GI-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
+; GI-NEXT: v_mov_b32_e32 v0, s16
+; GI-NEXT: v_mov_b32_e32 v1, s17
+; GI-NEXT: s_waitcnt lgkmcnt(0)
+; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GI-NEXT: v_mov_b32_e32 v0, 0
+; GI-NEXT: v_readlane_b32 s31, v40, 1
+; GI-NEXT: v_readlane_b32 s30, v40, 0
+; GI-NEXT: s_mov_b32 s32, s33
+; GI-NEXT: v_readlane_b32 s4, v40, 2
+; GI-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GI-NEXT: s_mov_b64 exec, s[6:7]
+; GI-NEXT: s_mov_b32 s33, s4
+; GI-NEXT: s_waitcnt vmcnt(0)
+; GI-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca i32, i32 %n, align 4
+ call void @bar(ptr %alloca)
+ %load = load i32, ptr %alloca
+ ret i32 0
+}
diff --git a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll b/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
deleted file mode 100644
index 1e72e679e83c0..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s
-
-; The alloca has the wrong address space and is passed to a call. The
-; FrameIndex was created with the natural 32-bit pointer type instead
-; of the declared 64-bit. Make sure we don't assert.
-
-; CHECK: LLVM ERROR: Cannot select: {{.*}}: i64 = FrameIndex<0>
-
-declare void @func(ptr)
-
-define void @main() {
-bb:
- %alloca = alloca i32, align 4
- call void @func(ptr %alloca)
- ret void
-}
More information about the llvm-commits
mailing list