[llvm] [AMDGPU] Support alloca in AS0 (PR #136584)

Tue Apr 22 17:36:36 PDT 2025

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/136584

>From 22cdc022c12cd0a1583b4d9db7cd23210d58bfa9 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 22 Apr 2025 20:36:20 -0400
Subject: [PATCH] [AMDGPU] Support alloca in AS0

This PR lowers an alloca in AS0 to an alloca in AS5 followed by an addrspacecast
back to AS0.
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   3 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  33 ++-
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |   5 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  29 ++-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   1 +
 llvm/test/CodeGen/AMDGPU/alloca-as0.ll        | 208 ++++++++++++++++++
 .../AMDGPU/assert-wrong-alloca-addrspace.ll   |  16 --
 7 files changed, 276 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/alloca-as0.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2846405a2538c..ddc61a219eb83 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -385,9 +385,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
 
+  setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
+
   // For R600, this is totally unsupported, just custom lower to produce an
   // error.
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
 
   // Library functions.  These default to Expand, but we have instructions
   // for them.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index beb6432170970..4d7002db2cca7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -912,12 +912,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .widenScalarToNextPow2(0, 32)
       .clampMaxNumElements(0, S32, 16);
 
-  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({PrivatePtr});
+  getActionDefinitionsBuilder(G_FRAME_INDEX)
+      .legalFor({PrivatePtr})
+      .customFor({FlatPtr});
 
   // If the amount is divergent, we have to do a wave reduction to get the
   // maximum value, so this is expanded during RegBankSelect.
   getActionDefinitionsBuilder(G_DYN_STACKALLOC)
-    .legalFor({{PrivatePtr, S32}});
+      .legalFor({{PrivatePtr, S32}})
+      .customFor({FlatPtr, S32});
 
   getActionDefinitionsBuilder(G_STACKSAVE)
     .customFor({PrivatePtr});
@@ -2221,6 +2224,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(
     return legalizeTrap(MI, MRI, B);
   case TargetOpcode::G_DEBUGTRAP:
     return legalizeDebugTrap(MI, MRI, B);
+  case TargetOpcode::G_FRAME_INDEX:
+    return legalizeFrameIndex(MI, MRI, B);
+  case TargetOpcode::G_DYN_STACKALLOC:
+    return legalizeDynStackAlloc(MI, MRI, B);
   default:
     return false;
   }
@@ -7668,3 +7675,25 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
 
   return true;
 }
+
+bool AMDGPULegalizerInfo::legalizeFrameIndex(MachineInstr &MI,
+                                             MachineRegisterInfo &MRI,
+                                             MachineIRBuilder &B) const {
+  MachineInstrBuilder FI = B.buildFrameIndex(
+      LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), MI.getOperand(1).getIndex());
+  B.buildAddrSpaceCast(MI.getOperand(0).getReg(), FI);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeDynStackAlloc(MachineInstr &MI,
+                                                MachineRegisterInfo &MRI,
+                                                MachineIRBuilder &B) const {
+  MachineInstrBuilder Size = B.buildTrunc(S32, MI.getOperand(1));
+  Align Alignment(MI.getOperand(2).getImm());
+  MachineInstrBuilder DynStackAlloc = B.buildDynStackAlloc(
+      LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), Size, Alignment);
+  B.buildAddrSpaceCast(MI.getOperand(0).getReg(), DynStackAlloc);
+  MI.eraseFromParent();
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1f4e02b0d600a..55250530689cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -246,6 +246,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
 
   bool legalizeIntrinsic(LegalizerHelper &Helper,
                          MachineInstr &MI) const override;
+
+  bool legalizeFrameIndex(MachineInstr &MI, MachineRegisterInfo &MRI,
+                          MachineIRBuilder &B) const;
+  bool legalizeDynStackAlloc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             MachineIRBuilder &B) const;
 };
 } // End llvm namespace.
 #endif
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e3cd5ca6692d..3f2e5fbce03a1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4117,6 +4117,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
                          InVals, /*IsThisReturn=*/false, SDValue());
 }
 
+SDValue SITargetLowering::lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+  // Since address space information is lost here, we assume that an i64 frame
+  // index comes from an alloca in AS0.
+  SDLoc DL(Op);
+  auto *FI = cast<FrameIndexSDNode>(Op);
+  SDValue TFI = DAG.getFrameIndex(FI->getIndex(), MVT::i32);
+  return DAG.getAddrSpaceCast(DL, Op.getValueType(), TFI,
+                              AMDGPUAS::PRIVATE_ADDRESS,
+                              AMDGPUAS::FLAT_ADDRESS);
+}
+
 // This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
 // except for:
 // 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
@@ -4129,13 +4140,27 @@ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+
+  // Since address space information is lost here, we assume that an i64 dynamic
+  // alloca comes from an alloca in AS0.
+  if (VT == MVT::i64) {
+    SDValue Align = Op.getOperand(2);
+    Size = DAG.getZExtOrTrunc(Size, dl, MVT::i32);
+    SDValue Ops[] = {Chain, Size, Align};
+    SDValue DynAlloc =
+        DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, {MVT::i32, MVT::Other}, Ops);
+    SDValue Cast = DAG.getAddrSpaceCast(
+        dl, VT, DynAlloc, AMDGPUAS::PRIVATE_ADDRESS, AMDGPUAS::FLAT_ADDRESS);
+    return DAG.getMergeValues({Cast, DynAlloc.getValue(1)}, dl);
+  }
+
   Register SPReg = Info->getStackPtrOffsetReg();
 
   // Chain the dynamic stack allocation so that it doesn't modify the stack
   // pointer when other instructions are using the stack.
   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
 
-  SDValue Size = Op.getOperand(1);
   SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
 
@@ -6087,6 +6112,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI:
     return lowerXMUL_LOHI(Op, DAG);
+  case ISD::FrameIndex:
+    return lowerFrameIndex(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::STACKSAVE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index c42366a1c04c8..f08cd15282c94 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -428,6 +428,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
 
+  SDValue lowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
new file mode 100644
index 0000000000000..b0ff79c89d82e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s --check-prefix=ISEL
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel %s -o - | FileCheck %s --check-prefix=GI
+
+declare void @bar(ptr)
+
+define i32 @static_alloca() {
+; ISEL-LABEL: static_alloca:
+; ISEL:       ; %bb.0:
+; ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_mov_b32 s16, s33
+; ISEL-NEXT:    s_mov_b32 s33, s32
+; ISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; ISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; ISEL-NEXT:    s_mov_b64 exec, s[18:19]
+; ISEL-NEXT:    s_addk_i32 s32, 0x400
+; ISEL-NEXT:    v_writelane_b32 v40, s16, 4
+; ISEL-NEXT:    s_getpc_b64 s[16:17]
+; ISEL-NEXT:    s_add_u32 s16, s16, bar at gotpcrel32@lo+4
+; ISEL-NEXT:    s_addc_u32 s17, s17, bar at gotpcrel32@hi+12
+; ISEL-NEXT:    v_writelane_b32 v40, s30, 0
+; ISEL-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
+; ISEL-NEXT:    s_lshr_b32 s18, s33, 6
+; ISEL-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
+; ISEL-NEXT:    v_writelane_b32 v40, s31, 1
+; ISEL-NEXT:    s_cmp_lg_u32 s18, -1
+; ISEL-NEXT:    v_readfirstlane_b32 s18, v0
+; ISEL-NEXT:    v_writelane_b32 v40, s34, 2
+; ISEL-NEXT:    s_cselect_b32 s34, s18, 0
+; ISEL-NEXT:    s_mov_b64 s[18:19], src_private_base
+; ISEL-NEXT:    v_writelane_b32 v40, s35, 3
+; ISEL-NEXT:    s_cselect_b32 s35, s19, 0
+; ISEL-NEXT:    v_mov_b32_e32 v0, s34
+; ISEL-NEXT:    v_mov_b32_e32 v1, s35
+; ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; ISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; ISEL-NEXT:    v_mov_b32_e32 v0, s34
+; ISEL-NEXT:    v_mov_b32_e32 v1, s35
+; ISEL-NEXT:    flat_load_dword v0, v[0:1]
+; ISEL-NEXT:    v_readlane_b32 s35, v40, 3
+; ISEL-NEXT:    v_readlane_b32 s34, v40, 2
+; ISEL-NEXT:    v_readlane_b32 s31, v40, 1
+; ISEL-NEXT:    v_readlane_b32 s30, v40, 0
+; ISEL-NEXT:    s_mov_b32 s32, s33
+; ISEL-NEXT:    v_readlane_b32 s4, v40, 4
+; ISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; ISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; ISEL-NEXT:    s_mov_b64 exec, s[6:7]
+; ISEL-NEXT:    s_mov_b32 s33, s4
+; ISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GI-LABEL: static_alloca:
+; GI:       ; %bb.0:
+; GI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_mov_b32 s16, s33
+; GI-NEXT:    s_mov_b32 s33, s32
+; GI-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GI-NEXT:    s_mov_b64 exec, s[18:19]
+; GI-NEXT:    v_writelane_b32 v40, s16, 4
+; GI-NEXT:    v_writelane_b32 v40, s30, 0
+; GI-NEXT:    v_writelane_b32 v40, s31, 1
+; GI-NEXT:    s_addk_i32 s32, 0x400
+; GI-NEXT:    v_writelane_b32 v40, s34, 2
+; GI-NEXT:    s_lshr_b32 s34, s33, 6
+; GI-NEXT:    s_getpc_b64 s[18:19]
+; GI-NEXT:    s_add_u32 s18, s18, bar at gotpcrel32@lo+4
+; GI-NEXT:    s_addc_u32 s19, s19, bar at gotpcrel32@hi+12
+; GI-NEXT:    s_load_dwordx2 s[18:19], s[18:19], 0x0
+; GI-NEXT:    s_mov_b64 s[16:17], src_private_base
+; GI-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
+; GI-NEXT:    v_mov_b32_e32 v1, s17
+; GI-NEXT:    v_writelane_b32 v40, s35, 3
+; GI-NEXT:    s_mov_b32 s35, s17
+; GI-NEXT:    s_waitcnt lgkmcnt(0)
+; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GI-NEXT:    v_mov_b32_e32 v0, s34
+; GI-NEXT:    v_mov_b32_e32 v1, s35
+; GI-NEXT:    flat_load_dword v0, v[0:1]
+; GI-NEXT:    v_readlane_b32 s35, v40, 3
+; GI-NEXT:    v_readlane_b32 s34, v40, 2
+; GI-NEXT:    v_readlane_b32 s31, v40, 1
+; GI-NEXT:    v_readlane_b32 s30, v40, 0
+; GI-NEXT:    s_mov_b32 s32, s33
+; GI-NEXT:    v_readlane_b32 s4, v40, 4
+; GI-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GI-NEXT:    s_mov_b64 exec, s[6:7]
+; GI-NEXT:    s_mov_b32 s33, s4
+; GI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_setpc_b64 s[30:31]
+  %alloca = alloca i32, align 4
+  call void @bar(ptr %alloca)
+  %load = load i32, ptr %alloca
+  ret i32 %load
+}
+
+define i32 @dynamic_alloca(i32 %n) {
+; ISEL-LABEL: dynamic_alloca:
+; ISEL:       ; %bb.0:
+; ISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_mov_b32 s16, s33
+; ISEL-NEXT:    s_mov_b32 s33, s32
+; ISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; ISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; ISEL-NEXT:    s_mov_b64 exec, s[18:19]
+; ISEL-NEXT:    v_writelane_b32 v40, s16, 2
+; ISEL-NEXT:    v_mad_u64_u32 v[0:1], s[16:17], v0, 4, 15
+; ISEL-NEXT:    v_writelane_b32 v40, s30, 0
+; ISEL-NEXT:    s_mov_b32 s18, 0
+; ISEL-NEXT:    v_and_b32_e32 v0, -16, v0
+; ISEL-NEXT:    s_mov_b64 s[16:17], exec
+; ISEL-NEXT:    s_addk_i32 s32, 0x400
+; ISEL-NEXT:    v_writelane_b32 v40, s31, 1
+; ISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; ISEL-NEXT:    s_ff1_i32_b64 s19, s[16:17]
+; ISEL-NEXT:    v_readlane_b32 s20, v0, s19
+; ISEL-NEXT:    s_bitset0_b64 s[16:17], s19
+; ISEL-NEXT:    s_max_u32 s18, s18, s20
+; ISEL-NEXT:    s_cmp_lg_u64 s[16:17], 0
+; ISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; ISEL-NEXT:  ; %bb.2:
+; ISEL-NEXT:    s_cmp_lg_u32 s32, -1
+; ISEL-NEXT:    s_mov_b64 s[16:17], src_private_base
+; ISEL-NEXT:    s_cselect_b32 s19, s32, 0
+; ISEL-NEXT:    s_cselect_b32 s20, s17, 0
+; ISEL-NEXT:    s_getpc_b64 s[16:17]
+; ISEL-NEXT:    s_add_u32 s16, s16, bar at gotpcrel32@lo+4
+; ISEL-NEXT:    s_addc_u32 s17, s17, bar at gotpcrel32@hi+12
+; ISEL-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
+; ISEL-NEXT:    v_mov_b32_e32 v0, s32
+; ISEL-NEXT:    v_lshl_add_u32 v0, s18, 6, v0
+; ISEL-NEXT:    v_readfirstlane_b32 s18, v0
+; ISEL-NEXT:    v_mov_b32_e32 v0, s19
+; ISEL-NEXT:    v_mov_b32_e32 v1, s20
+; ISEL-NEXT:    s_mov_b32 s32, s18
+; ISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; ISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; ISEL-NEXT:    v_mov_b32_e32 v0, 0
+; ISEL-NEXT:    v_readlane_b32 s31, v40, 1
+; ISEL-NEXT:    v_readlane_b32 s30, v40, 0
+; ISEL-NEXT:    s_mov_b32 s32, s33
+; ISEL-NEXT:    v_readlane_b32 s4, v40, 2
+; ISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; ISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; ISEL-NEXT:    s_mov_b64 exec, s[6:7]
+; ISEL-NEXT:    s_mov_b32 s33, s4
+; ISEL-NEXT:    s_waitcnt vmcnt(0)
+; ISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GI-LABEL: dynamic_alloca:
+; GI:       ; %bb.0:
+; GI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_mov_b32 s16, s33
+; GI-NEXT:    s_mov_b32 s33, s32
+; GI-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GI-NEXT:    s_mov_b64 exec, s[18:19]
+; GI-NEXT:    v_mov_b32_e32 v1, 0
+; GI-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
+; GI-NEXT:    v_writelane_b32 v40, s16, 2
+; GI-NEXT:    v_add_co_u32_e32 v0, vcc, 15, v0
+; GI-NEXT:    v_writelane_b32 v40, s30, 0
+; GI-NEXT:    s_mov_b32 s18, 0
+; GI-NEXT:    v_and_b32_e32 v0, -16, v0
+; GI-NEXT:    s_mov_b64 s[16:17], exec
+; GI-NEXT:    s_addk_i32 s32, 0x400
+; GI-NEXT:    v_writelane_b32 v40, s31, 1
+; GI-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GI-NEXT:    s_ff1_i32_b64 s19, s[16:17]
+; GI-NEXT:    v_readlane_b32 s20, v0, s19
+; GI-NEXT:    s_bitset0_b64 s[16:17], s19
+; GI-NEXT:    s_max_u32 s18, s18, s20
+; GI-NEXT:    s_cmp_lg_u64 s[16:17], 0
+; GI-NEXT:    s_cbranch_scc1 .LBB1_1
+; GI-NEXT:  ; %bb.2:
+; GI-NEXT:    s_mov_b32 s16, s32
+; GI-NEXT:    s_lshl_b32 s17, s18, 6
+; GI-NEXT:    s_add_u32 s32, s16, s17
+; GI-NEXT:    s_mov_b64 s[18:19], src_private_base
+; GI-NEXT:    s_mov_b32 s17, s19
+; GI-NEXT:    s_cmp_lg_u32 s16, -1
+; GI-NEXT:    s_cselect_b64 s[16:17], s[16:17], 0
+; GI-NEXT:    s_getpc_b64 s[18:19]
+; GI-NEXT:    s_add_u32 s18, s18, bar at gotpcrel32@lo+4
+; GI-NEXT:    s_addc_u32 s19, s19, bar at gotpcrel32@hi+12
+; GI-NEXT:    s_load_dwordx2 s[18:19], s[18:19], 0x0
+; GI-NEXT:    v_mov_b32_e32 v0, s16
+; GI-NEXT:    v_mov_b32_e32 v1, s17
+; GI-NEXT:    s_waitcnt lgkmcnt(0)
+; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GI-NEXT:    v_mov_b32_e32 v0, 0
+; GI-NEXT:    v_readlane_b32 s31, v40, 1
+; GI-NEXT:    v_readlane_b32 s30, v40, 0
+; GI-NEXT:    s_mov_b32 s32, s33
+; GI-NEXT:    v_readlane_b32 s4, v40, 2
+; GI-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GI-NEXT:    s_mov_b64 exec, s[6:7]
+; GI-NEXT:    s_mov_b32 s33, s4
+; GI-NEXT:    s_waitcnt vmcnt(0)
+; GI-NEXT:    s_setpc_b64 s[30:31]
+  %alloca = alloca i32, i32 %n, align 4
+  call void @bar(ptr %alloca)
+  %load = load i32, ptr %alloca
+  ret i32 0
+}
diff --git a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll b/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
deleted file mode 100644
index 1e72e679e83c0..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s
-
-; The alloca has the wrong address space and is passed to a call. The
-; FrameIndex was created with the natural 32-bit pointer type instead
-; of the declared 64-bit. Make sure we don't assert.
-
-; CHECK: LLVM ERROR: Cannot select: {{.*}}: i64 = FrameIndex<0>
-
-declare void @func(ptr)
-
-define void @main() {
-bb:
-  %alloca = alloca i32, align 4
-  call void @func(ptr %alloca)
-  ret void
-}