[llvm-branch-commits] [llvm] [AMDGPU] Make `AllocaInst` return AS5 in `getAssumedAddrSpace` (PR #136798)

Wed Apr 23 13:48:30 PDT 2025

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/136798

>From 9d2612c4379eb827406642b508f2dce32fc13e59 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 23 Apr 2025 09:17:46 -0400
Subject: [PATCH] [AMDGPU] Make `AllocaInst` return AS5 in
 `getAssumedAddrSpace`

---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 +
 llvm/test/CodeGen/AMDGPU/alloca-as0.ll        | 122 ++++++++----------
 .../InferAddressSpaces/AMDGPU/alloca-as0.ll   |  35 +++++
 3 files changed, 90 insertions(+), 70 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b6cc5137d711a..2c4052a30b10f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -951,6 +951,9 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
 }
 
 unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
+  if (isa<AllocaInst>(V))
+    return AMDGPUAS::PRIVATE_ADDRESS;
+
   const auto *LD = dyn_cast<LoadInst>(V);
   if (!LD) // TODO: Handle invariant load like constant.
     return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
diff --git a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
index 9fcb362c153ba..5172ff011e45f 100644
--- a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
+++ b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
@@ -14,7 +14,7 @@ define i32 @static_alloca() {
 ; ISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; ISEL-NEXT:    s_mov_b64 exec, s[18:19]
 ; ISEL-NEXT:    s_addk_i32 s32, 0x400
-; ISEL-NEXT:    v_writelane_b32 v40, s16, 4
+; ISEL-NEXT:    v_writelane_b32 v40, s16, 3
 ; ISEL-NEXT:    s_getpc_b64 s[16:17]
 ; ISEL-NEXT:    s_add_u32 s16, s16, bar at rel32@lo+4
 ; ISEL-NEXT:    s_addc_u32 s17, s17, bar at rel32@hi+12
@@ -27,25 +27,22 @@ define i32 @static_alloca() {
 ; ISEL-NEXT:    v_writelane_b32 v40, s34, 2
 ; ISEL-NEXT:    s_cselect_b32 s34, s18, 0
 ; ISEL-NEXT:    s_mov_b64 s[18:19], src_private_base
-; ISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; ISEL-NEXT:    s_cselect_b32 s35, s19, 0
+; ISEL-NEXT:    s_cselect_b32 s18, s19, 0
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s34
-; ISEL-NEXT:    v_mov_b32_e32 v1, s35
+; ISEL-NEXT:    v_mov_b32_e32 v1, s18
 ; ISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s34
-; ISEL-NEXT:    v_mov_b32_e32 v1, s35
-; ISEL-NEXT:    flat_load_dword v0, v[0:1]
-; ISEL-NEXT:    v_readlane_b32 s35, v40, 3
+; ISEL-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
 ; ISEL-NEXT:    v_readlane_b32 s34, v40, 2
 ; ISEL-NEXT:    v_readlane_b32 s31, v40, 1
 ; ISEL-NEXT:    v_readlane_b32 s30, v40, 0
 ; ISEL-NEXT:    s_mov_b32 s32, s33
-; ISEL-NEXT:    v_readlane_b32 s4, v40, 4
+; ISEL-NEXT:    v_readlane_b32 s4, v40, 3
 ; ISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; ISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; ISEL-NEXT:    s_mov_b64 exec, s[6:7]
 ; ISEL-NEXT:    s_mov_b32 s33, s4
-; ISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_waitcnt vmcnt(0)
 ; ISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GI-LABEL: static_alloca:
@@ -56,35 +53,27 @@ define i32 @static_alloca() {
 ; GI-NEXT:    s_or_saveexec_b64 s[18:19], -1
 ; GI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GI-NEXT:    s_mov_b64 exec, s[18:19]
-; GI-NEXT:    v_writelane_b32 v40, s16, 4
-; GI-NEXT:    v_writelane_b32 v40, s30, 0
-; GI-NEXT:    v_writelane_b32 v40, s31, 1
+; GI-NEXT:    v_writelane_b32 v40, s16, 2
 ; GI-NEXT:    s_addk_i32 s32, 0x400
-; GI-NEXT:    v_writelane_b32 v40, s34, 2
-; GI-NEXT:    s_lshr_b32 s34, s33, 6
 ; GI-NEXT:    s_mov_b64 s[16:17], src_private_base
+; GI-NEXT:    v_writelane_b32 v40, s30, 0
 ; GI-NEXT:    s_getpc_b64 s[18:19]
 ; GI-NEXT:    s_add_u32 s18, s18, bar at rel32@lo+4
 ; GI-NEXT:    s_addc_u32 s19, s19, bar at rel32@hi+12
 ; GI-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
 ; GI-NEXT:    v_mov_b32_e32 v1, s17
-; GI-NEXT:    v_writelane_b32 v40, s35, 3
-; GI-NEXT:    s_mov_b32 s35, s17
+; GI-NEXT:    v_writelane_b32 v40, s31, 1
 ; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GI-NEXT:    v_mov_b32_e32 v0, s34
-; GI-NEXT:    v_mov_b32_e32 v1, s35
-; GI-NEXT:    flat_load_dword v0, v[0:1]
-; GI-NEXT:    v_readlane_b32 s35, v40, 3
-; GI-NEXT:    v_readlane_b32 s34, v40, 2
+; GI-NEXT:    buffer_load_dword v0, off, s[0:3], s33
 ; GI-NEXT:    v_readlane_b32 s31, v40, 1
 ; GI-NEXT:    v_readlane_b32 s30, v40, 0
 ; GI-NEXT:    s_mov_b32 s32, s33
-; GI-NEXT:    v_readlane_b32 s4, v40, 4
+; GI-NEXT:    v_readlane_b32 s4, v40, 2
 ; GI-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
 ; GI-NEXT:    s_mov_b64 exec, s[6:7]
 ; GI-NEXT:    s_mov_b32 s33, s4
-; GI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_waitcnt vmcnt(0)
 ; GI-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca i32, align 4
   call void @bar(ptr %alloca)
@@ -112,19 +101,18 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
 ; ISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; ISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; ISEL-NEXT:    s_cselect_b32 s33, 0, 0
-; ISEL-NEXT:    s_cselect_b32 s36, s15, 0
+; ISEL-NEXT:    s_cselect_b32 s15, s15, 0
 ; ISEL-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; ISEL-NEXT:    s_mov_b32 s14, s16
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s33
-; ISEL-NEXT:    v_mov_b32_e32 v1, s36
+; ISEL-NEXT:    v_mov_b32_e32 v1, s15
 ; ISEL-NEXT:    s_movk_i32 s32, 0x400
 ; ISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s33
-; ISEL-NEXT:    v_mov_b32_e32 v1, s36
-; ISEL-NEXT:    flat_load_dword v2, v[0:1]
+; ISEL-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s34
 ; ISEL-NEXT:    v_mov_b32_e32 v1, s35
-; ISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_waitcnt vmcnt(0)
 ; ISEL-NEXT:    flat_store_dword v[0:1], v2
 ; ISEL-NEXT:    s_endpgm
 ;
@@ -138,10 +126,10 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
 ; GI-NEXT:    s_add_u32 s8, s8, 8
 ; GI-NEXT:    s_mov_b32 s13, s15
 ; GI-NEXT:    s_mov_b32 s12, s14
+; GI-NEXT:    s_mov_b64 s[14:15], src_private_base
 ; GI-NEXT:    s_addc_u32 s9, s9, 0
 ; GI-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GI-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GI-NEXT:    s_mov_b64 s[14:15], src_private_base
 ; GI-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GI-NEXT:    s_getpc_b64 s[18:19]
 ; GI-NEXT:    s_add_u32 s18, s18, bar at rel32@lo+4
@@ -150,15 +138,11 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
 ; GI-NEXT:    v_mov_b32_e32 v1, s15
 ; GI-NEXT:    s_mov_b32 s14, s16
 ; GI-NEXT:    s_movk_i32 s32, 0x400
-; GI-NEXT:    s_mov_b32 s36, 0
-; GI-NEXT:    s_mov_b32 s37, s15
 ; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GI-NEXT:    v_mov_b32_e32 v0, s36
-; GI-NEXT:    v_mov_b32_e32 v1, s37
-; GI-NEXT:    flat_load_dword v2, v[0:1]
+; GI-NEXT:    buffer_load_dword v2, off, s[0:3], 0
 ; GI-NEXT:    v_mov_b32_e32 v0, s34
 ; GI-NEXT:    v_mov_b32_e32 v1, s35
-; GI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_waitcnt vmcnt(0)
 ; GI-NEXT:    flat_store_dword v[0:1], v2
 ; GI-NEXT:    s_endpgm
   %alloca = alloca i32, align 4
@@ -279,24 +263,24 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
 ; ISEL-LABEL: dynamic_alloca_i32_kernel:
 ; ISEL:       ; %bb.0:
 ; ISEL-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
+; ISEL-NEXT:    s_mov_b32 s12, s14
+; ISEL-NEXT:    s_load_dword s14, s[8:9], 0x0
+; ISEL-NEXT:    s_load_dwordx2 s[34:35], s[8:9], 0x8
 ; ISEL-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
 ; ISEL-NEXT:    s_add_u32 s0, s0, s17
-; ISEL-NEXT:    s_load_dword s17, s[8:9], 0x0
-; ISEL-NEXT:    s_load_dwordx2 s[34:35], s[8:9], 0x8
-; ISEL-NEXT:    s_movk_i32 s32, 0x400
 ; ISEL-NEXT:    s_addc_u32 s1, s1, 0
-; ISEL-NEXT:    s_mov_b32 s13, s15
-; ISEL-NEXT:    s_mov_b32 s12, s14
-; ISEL-NEXT:    s_mov_b64 s[14:15], src_private_base
-; ISEL-NEXT:    s_cmp_lg_u32 s32, -1
-; ISEL-NEXT:    s_cselect_b32 s15, s15, 0
-; ISEL-NEXT:    s_cselect_b32 s20, s32, 0
 ; ISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; ISEL-NEXT:    s_lshl_b32 s14, s17, 2
+; ISEL-NEXT:    s_lshl_b32 s14, s14, 2
 ; ISEL-NEXT:    s_add_i32 s14, s14, 15
 ; ISEL-NEXT:    s_and_b32 s14, s14, -16
+; ISEL-NEXT:    s_movk_i32 s32, 0x400
 ; ISEL-NEXT:    s_lshl_b32 s14, s14, 6
-; ISEL-NEXT:    s_add_i32 s32, s32, s14
+; ISEL-NEXT:    s_add_i32 s17, s32, s14
+; ISEL-NEXT:    s_mov_b32 s13, s15
+; ISEL-NEXT:    s_cmp_lg_u32 s32, -1
+; ISEL-NEXT:    s_mov_b64 s[14:15], src_private_base
+; ISEL-NEXT:    s_cselect_b32 s36, s32, 0
+; ISEL-NEXT:    s_cselect_b32 s15, s15, 0
 ; ISEL-NEXT:    s_add_u32 s8, s8, 16
 ; ISEL-NEXT:    s_addc_u32 s9, s9, 0
 ; ISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
@@ -306,16 +290,16 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
 ; ISEL-NEXT:    s_addc_u32 s19, s19, bar at rel32@hi+12
 ; ISEL-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; ISEL-NEXT:    s_mov_b32 s14, s16
-; ISEL-NEXT:    v_mov_b32_e32 v0, s20
+; ISEL-NEXT:    v_mov_b32_e32 v0, s36
 ; ISEL-NEXT:    v_mov_b32_e32 v1, s15
 ; ISEL-NEXT:    s_mov_b32 s33, 0
-; ISEL-NEXT:    v_mov_b32_e32 v40, s20
-; ISEL-NEXT:    v_mov_b32_e32 v41, s15
+; ISEL-NEXT:    s_mov_b32 s32, s17
 ; ISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; ISEL-NEXT:    flat_load_dword v2, v[40:41]
+; ISEL-NEXT:    v_mov_b32_e32 v0, s36
+; ISEL-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
 ; ISEL-NEXT:    v_mov_b32_e32 v0, s34
 ; ISEL-NEXT:    v_mov_b32_e32 v1, s35
-; ISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    s_waitcnt vmcnt(0)
 ; ISEL-NEXT:    flat_store_dword v[0:1], v2
 ; ISEL-NEXT:    s_endpgm
 ;
@@ -356,11 +340,10 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
 ; GI-NEXT:    s_mov_b32 s33, 0
 ; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GI-NEXT:    v_mov_b32_e32 v0, s36
-; GI-NEXT:    v_mov_b32_e32 v1, s37
-; GI-NEXT:    flat_load_dword v2, v[0:1]
+; GI-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
 ; GI-NEXT:    v_mov_b32_e32 v0, s34
 ; GI-NEXT:    v_mov_b32_e32 v1, s35
-; GI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_waitcnt vmcnt(0)
 ; GI-NEXT:    flat_store_dword v[0:1], v2
 ; GI-NEXT:    s_endpgm
   %alloca = alloca i32, i32 %n, align 4
@@ -478,24 +461,24 @@ define i32 @dynamic_alloca_i64(i64 %n) {
 define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
 ; ISEL-LABEL: dynamic_alloca_i64_kernel:
 ; ISEL:       ; %bb.0:
-; ISEL-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
 ; ISEL-NEXT:    s_load_dwordx4 s[20:23], s[8:9], 0x0
+; ISEL-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
 ; ISEL-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
 ; ISEL-NEXT:    s_add_u32 s0, s0, s17
-; ISEL-NEXT:    s_movk_i32 s32, 0x400
 ; ISEL-NEXT:    s_addc_u32 s1, s1, 0
-; ISEL-NEXT:    s_mov_b32 s13, s15
 ; ISEL-NEXT:    s_mov_b32 s12, s14
-; ISEL-NEXT:    s_mov_b64 s[14:15], src_private_base
-; ISEL-NEXT:    s_cmp_lg_u32 s32, -1
-; ISEL-NEXT:    s_cselect_b32 s15, s15, 0
-; ISEL-NEXT:    s_cselect_b32 s17, s32, 0
 ; ISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; ISEL-NEXT:    s_lshl_b32 s14, s20, 2
 ; ISEL-NEXT:    s_add_i32 s14, s14, 15
 ; ISEL-NEXT:    s_and_b32 s14, s14, -16
+; ISEL-NEXT:    s_movk_i32 s32, 0x400
 ; ISEL-NEXT:    s_lshl_b32 s14, s14, 6
-; ISEL-NEXT:    s_add_i32 s32, s32, s14
+; ISEL-NEXT:    s_add_i32 s17, s32, s14
+; ISEL-NEXT:    s_mov_b32 s13, s15
+; ISEL-NEXT:    s_cmp_lg_u32 s32, -1
+; ISEL-NEXT:    s_mov_b64 s[14:15], src_private_base
+; ISEL-NEXT:    s_cselect_b32 s34, s32, 0
+; ISEL-NEXT:    s_cselect_b32 s15, s15, 0
 ; ISEL-NEXT:    s_add_u32 s8, s8, 16
 ; ISEL-NEXT:    s_addc_u32 s9, s9, 0
 ; ISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
@@ -505,16 +488,16 @@ define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
 ; ISEL-NEXT:    s_addc_u32 s19, s19, bar at rel32@hi+12
 ; ISEL-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; ISEL-NEXT:    s_mov_b32 s14, s16
-; ISEL-NEXT:    v_mov_b32_e32 v0, s17
+; ISEL-NEXT:    v_mov_b32_e32 v0, s34
 ; ISEL-NEXT:    v_mov_b32_e32 v1, s15
 ; ISEL-NEXT:    s_mov_b32 s33, 0
 ; ISEL-NEXT:    v_mov_b32_e32 v40, s22
 ; ISEL-NEXT:    v_mov_b32_e32 v41, s23
-; ISEL-NEXT:    v_mov_b32_e32 v42, s17
-; ISEL-NEXT:    v_mov_b32_e32 v43, s15
+; ISEL-NEXT:    s_mov_b32 s32, s17
 ; ISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; ISEL-NEXT:    flat_load_dword v0, v[42:43]
-; ISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT:    v_mov_b32_e32 v0, s34
+; ISEL-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
+; ISEL-NEXT:    s_waitcnt vmcnt(0)
 ; ISEL-NEXT:    flat_store_dword v[40:41], v0
 ; ISEL-NEXT:    s_endpgm
 ;
@@ -553,11 +536,10 @@ define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
 ; GI-NEXT:    s_mov_b32 s33, 0
 ; GI-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GI-NEXT:    v_mov_b32_e32 v0, s34
-; GI-NEXT:    v_mov_b32_e32 v1, s35
-; GI-NEXT:    flat_load_dword v2, v[0:1]
+; GI-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
 ; GI-NEXT:    v_mov_b32_e32 v0, s38
 ; GI-NEXT:    v_mov_b32_e32 v1, s39
-; GI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT:    s_waitcnt vmcnt(0)
 ; GI-NEXT:    flat_store_dword v[0:1], v2
 ; GI-NEXT:    s_endpgm
   %alloca = alloca i32, i64 %n, align 4
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll
new file mode 100644
index 0000000000000..57dcd96594893
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s -o - | FileCheck %s
+
+declare void @bar(ptr)
+
+define i32 @static_alloca() {
+; CHECK-LABEL: define i32 @static_alloca() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+; CHECK-NEXT:    call void @bar(ptr [[TMP2]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
+  %alloca = alloca i32, align 4
+  call void @bar(ptr %alloca)
+  %load = load i32, ptr %alloca
+  ret i32 %load
+}
+
+define i32 @dynamic_alloca(i32 %n) {
+; CHECK-LABEL: define i32 @dynamic_alloca(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, i32 [[N]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA]] to ptr addrspace(5)
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+; CHECK-NEXT:    call void @bar(ptr [[TMP2]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+  %alloca = alloca i32, i32 %n, align 4
+  call void @bar(ptr %alloca)
+  %load = load i32, ptr %alloca
+  ret i32 0
+}