[llvm-branch-commits] [llvm] [AMDGPU] Make `AllocaInst` return AS5 in `getAssumedAddrSpace` (PR #136798)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 23 13:48:30 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/136798
>From 9d2612c4379eb827406642b508f2dce32fc13e59 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 23 Apr 2025 09:17:46 -0400
Subject: [PATCH] [AMDGPU] Make `AllocaInst` return AS5 in
`getAssumedAddrSpace`
---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +
llvm/test/CodeGen/AMDGPU/alloca-as0.ll | 122 ++++++++----------
.../InferAddressSpaces/AMDGPU/alloca-as0.ll | 35 +++++
3 files changed, 90 insertions(+), 70 deletions(-)
create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b6cc5137d711a..2c4052a30b10f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -951,6 +951,9 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
}
unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
+ if (isa<AllocaInst>(V))
+ return AMDGPUAS::PRIVATE_ADDRESS;
+
const auto *LD = dyn_cast<LoadInst>(V);
if (!LD) // TODO: Handle invariant load like constant.
return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
diff --git a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
index 9fcb362c153ba..5172ff011e45f 100644
--- a/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
+++ b/llvm/test/CodeGen/AMDGPU/alloca-as0.ll
@@ -14,7 +14,7 @@ define i32 @static_alloca() {
; ISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; ISEL-NEXT: s_mov_b64 exec, s[18:19]
; ISEL-NEXT: s_addk_i32 s32, 0x400
-; ISEL-NEXT: v_writelane_b32 v40, s16, 4
+; ISEL-NEXT: v_writelane_b32 v40, s16, 3
; ISEL-NEXT: s_getpc_b64 s[16:17]
; ISEL-NEXT: s_add_u32 s16, s16, bar at rel32@lo+4
; ISEL-NEXT: s_addc_u32 s17, s17, bar at rel32@hi+12
@@ -27,25 +27,22 @@ define i32 @static_alloca() {
; ISEL-NEXT: v_writelane_b32 v40, s34, 2
; ISEL-NEXT: s_cselect_b32 s34, s18, 0
; ISEL-NEXT: s_mov_b64 s[18:19], src_private_base
-; ISEL-NEXT: v_writelane_b32 v40, s35, 3
-; ISEL-NEXT: s_cselect_b32 s35, s19, 0
+; ISEL-NEXT: s_cselect_b32 s18, s19, 0
; ISEL-NEXT: v_mov_b32_e32 v0, s34
-; ISEL-NEXT: v_mov_b32_e32 v1, s35
+; ISEL-NEXT: v_mov_b32_e32 v1, s18
; ISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; ISEL-NEXT: v_mov_b32_e32 v0, s34
-; ISEL-NEXT: v_mov_b32_e32 v1, s35
-; ISEL-NEXT: flat_load_dword v0, v[0:1]
-; ISEL-NEXT: v_readlane_b32 s35, v40, 3
+; ISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; ISEL-NEXT: v_readlane_b32 s34, v40, 2
; ISEL-NEXT: v_readlane_b32 s31, v40, 1
; ISEL-NEXT: v_readlane_b32 s30, v40, 0
; ISEL-NEXT: s_mov_b32 s32, s33
-; ISEL-NEXT: v_readlane_b32 s4, v40, 4
+; ISEL-NEXT: v_readlane_b32 s4, v40, 3
; ISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
; ISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; ISEL-NEXT: s_mov_b64 exec, s[6:7]
; ISEL-NEXT: s_mov_b32 s33, s4
-; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_waitcnt vmcnt(0)
; ISEL-NEXT: s_setpc_b64 s[30:31]
;
; GI-LABEL: static_alloca:
@@ -56,35 +53,27 @@ define i32 @static_alloca() {
; GI-NEXT: s_or_saveexec_b64 s[18:19], -1
; GI-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GI-NEXT: s_mov_b64 exec, s[18:19]
-; GI-NEXT: v_writelane_b32 v40, s16, 4
-; GI-NEXT: v_writelane_b32 v40, s30, 0
-; GI-NEXT: v_writelane_b32 v40, s31, 1
+; GI-NEXT: v_writelane_b32 v40, s16, 2
; GI-NEXT: s_addk_i32 s32, 0x400
-; GI-NEXT: v_writelane_b32 v40, s34, 2
-; GI-NEXT: s_lshr_b32 s34, s33, 6
; GI-NEXT: s_mov_b64 s[16:17], src_private_base
+; GI-NEXT: v_writelane_b32 v40, s30, 0
; GI-NEXT: s_getpc_b64 s[18:19]
; GI-NEXT: s_add_u32 s18, s18, bar at rel32@lo+4
; GI-NEXT: s_addc_u32 s19, s19, bar at rel32@hi+12
; GI-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GI-NEXT: v_mov_b32_e32 v1, s17
-; GI-NEXT: v_writelane_b32 v40, s35, 3
-; GI-NEXT: s_mov_b32 s35, s17
+; GI-NEXT: v_writelane_b32 v40, s31, 1
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GI-NEXT: v_mov_b32_e32 v0, s34
-; GI-NEXT: v_mov_b32_e32 v1, s35
-; GI-NEXT: flat_load_dword v0, v[0:1]
-; GI-NEXT: v_readlane_b32 s35, v40, 3
-; GI-NEXT: v_readlane_b32 s34, v40, 2
+; GI-NEXT: buffer_load_dword v0, off, s[0:3], s33
; GI-NEXT: v_readlane_b32 s31, v40, 1
; GI-NEXT: v_readlane_b32 s30, v40, 0
; GI-NEXT: s_mov_b32 s32, s33
-; GI-NEXT: v_readlane_b32 s4, v40, 4
+; GI-NEXT: v_readlane_b32 s4, v40, 2
; GI-NEXT: s_or_saveexec_b64 s[6:7], -1
; GI-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GI-NEXT: s_mov_b64 exec, s[6:7]
; GI-NEXT: s_mov_b32 s33, s4
-; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT: s_waitcnt vmcnt(0)
; GI-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4
call void @bar(ptr %alloca)
@@ -112,19 +101,18 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
; ISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; ISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; ISEL-NEXT: s_cselect_b32 s33, 0, 0
-; ISEL-NEXT: s_cselect_b32 s36, s15, 0
+; ISEL-NEXT: s_cselect_b32 s15, s15, 0
; ISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; ISEL-NEXT: s_mov_b32 s14, s16
; ISEL-NEXT: v_mov_b32_e32 v0, s33
-; ISEL-NEXT: v_mov_b32_e32 v1, s36
+; ISEL-NEXT: v_mov_b32_e32 v1, s15
; ISEL-NEXT: s_movk_i32 s32, 0x400
; ISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
; ISEL-NEXT: v_mov_b32_e32 v0, s33
-; ISEL-NEXT: v_mov_b32_e32 v1, s36
-; ISEL-NEXT: flat_load_dword v2, v[0:1]
+; ISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
; ISEL-NEXT: v_mov_b32_e32 v0, s34
; ISEL-NEXT: v_mov_b32_e32 v1, s35
-; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_waitcnt vmcnt(0)
; ISEL-NEXT: flat_store_dword v[0:1], v2
; ISEL-NEXT: s_endpgm
;
@@ -138,10 +126,10 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
; GI-NEXT: s_add_u32 s8, s8, 8
; GI-NEXT: s_mov_b32 s13, s15
; GI-NEXT: s_mov_b32 s12, s14
+; GI-NEXT: s_mov_b64 s[14:15], src_private_base
; GI-NEXT: s_addc_u32 s9, s9, 0
; GI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GI-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GI-NEXT: s_mov_b64 s[14:15], src_private_base
; GI-NEXT: v_or3_b32 v31, v0, v1, v2
; GI-NEXT: s_getpc_b64 s[18:19]
; GI-NEXT: s_add_u32 s18, s18, bar at rel32@lo+4
@@ -150,15 +138,11 @@ define amdgpu_kernel void @static_alloca_kernel(ptr %p) {
; GI-NEXT: v_mov_b32_e32 v1, s15
; GI-NEXT: s_mov_b32 s14, s16
; GI-NEXT: s_movk_i32 s32, 0x400
-; GI-NEXT: s_mov_b32 s36, 0
-; GI-NEXT: s_mov_b32 s37, s15
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; GI-NEXT: v_mov_b32_e32 v0, s36
-; GI-NEXT: v_mov_b32_e32 v1, s37
-; GI-NEXT: flat_load_dword v2, v[0:1]
+; GI-NEXT: buffer_load_dword v2, off, s[0:3], 0
; GI-NEXT: v_mov_b32_e32 v0, s34
; GI-NEXT: v_mov_b32_e32 v1, s35
-; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT: s_waitcnt vmcnt(0)
; GI-NEXT: flat_store_dword v[0:1], v2
; GI-NEXT: s_endpgm
%alloca = alloca i32, align 4
@@ -279,24 +263,24 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
; ISEL-LABEL: dynamic_alloca_i32_kernel:
; ISEL: ; %bb.0:
; ISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; ISEL-NEXT: s_mov_b32 s12, s14
+; ISEL-NEXT: s_load_dword s14, s[8:9], 0x0
+; ISEL-NEXT: s_load_dwordx2 s[34:35], s[8:9], 0x8
; ISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; ISEL-NEXT: s_add_u32 s0, s0, s17
-; ISEL-NEXT: s_load_dword s17, s[8:9], 0x0
-; ISEL-NEXT: s_load_dwordx2 s[34:35], s[8:9], 0x8
-; ISEL-NEXT: s_movk_i32 s32, 0x400
; ISEL-NEXT: s_addc_u32 s1, s1, 0
-; ISEL-NEXT: s_mov_b32 s13, s15
-; ISEL-NEXT: s_mov_b32 s12, s14
-; ISEL-NEXT: s_mov_b64 s[14:15], src_private_base
-; ISEL-NEXT: s_cmp_lg_u32 s32, -1
-; ISEL-NEXT: s_cselect_b32 s15, s15, 0
-; ISEL-NEXT: s_cselect_b32 s20, s32, 0
; ISEL-NEXT: s_waitcnt lgkmcnt(0)
-; ISEL-NEXT: s_lshl_b32 s14, s17, 2
+; ISEL-NEXT: s_lshl_b32 s14, s14, 2
; ISEL-NEXT: s_add_i32 s14, s14, 15
; ISEL-NEXT: s_and_b32 s14, s14, -16
+; ISEL-NEXT: s_movk_i32 s32, 0x400
; ISEL-NEXT: s_lshl_b32 s14, s14, 6
-; ISEL-NEXT: s_add_i32 s32, s32, s14
+; ISEL-NEXT: s_add_i32 s17, s32, s14
+; ISEL-NEXT: s_mov_b32 s13, s15
+; ISEL-NEXT: s_cmp_lg_u32 s32, -1
+; ISEL-NEXT: s_mov_b64 s[14:15], src_private_base
+; ISEL-NEXT: s_cselect_b32 s36, s32, 0
+; ISEL-NEXT: s_cselect_b32 s15, s15, 0
; ISEL-NEXT: s_add_u32 s8, s8, 16
; ISEL-NEXT: s_addc_u32 s9, s9, 0
; ISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
@@ -306,16 +290,16 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
; ISEL-NEXT: s_addc_u32 s19, s19, bar at rel32@hi+12
; ISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; ISEL-NEXT: s_mov_b32 s14, s16
-; ISEL-NEXT: v_mov_b32_e32 v0, s20
+; ISEL-NEXT: v_mov_b32_e32 v0, s36
; ISEL-NEXT: v_mov_b32_e32 v1, s15
; ISEL-NEXT: s_mov_b32 s33, 0
-; ISEL-NEXT: v_mov_b32_e32 v40, s20
-; ISEL-NEXT: v_mov_b32_e32 v41, s15
+; ISEL-NEXT: s_mov_b32 s32, s17
; ISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; ISEL-NEXT: flat_load_dword v2, v[40:41]
+; ISEL-NEXT: v_mov_b32_e32 v0, s36
+; ISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
; ISEL-NEXT: v_mov_b32_e32 v0, s34
; ISEL-NEXT: v_mov_b32_e32 v1, s35
-; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT: s_waitcnt vmcnt(0)
; ISEL-NEXT: flat_store_dword v[0:1], v2
; ISEL-NEXT: s_endpgm
;
@@ -356,11 +340,10 @@ define amdgpu_kernel void @dynamic_alloca_i32_kernel(i32 %n, ptr %p) {
; GI-NEXT: s_mov_b32 s33, 0
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GI-NEXT: v_mov_b32_e32 v0, s36
-; GI-NEXT: v_mov_b32_e32 v1, s37
-; GI-NEXT: flat_load_dword v2, v[0:1]
+; GI-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
; GI-NEXT: v_mov_b32_e32 v0, s34
; GI-NEXT: v_mov_b32_e32 v1, s35
-; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT: s_waitcnt vmcnt(0)
; GI-NEXT: flat_store_dword v[0:1], v2
; GI-NEXT: s_endpgm
%alloca = alloca i32, i32 %n, align 4
@@ -478,24 +461,24 @@ define i32 @dynamic_alloca_i64(i64 %n) {
define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
; ISEL-LABEL: dynamic_alloca_i64_kernel:
; ISEL: ; %bb.0:
-; ISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; ISEL-NEXT: s_load_dwordx4 s[20:23], s[8:9], 0x0
+; ISEL-NEXT: s_add_u32 flat_scratch_lo, s12, s17
; ISEL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; ISEL-NEXT: s_add_u32 s0, s0, s17
-; ISEL-NEXT: s_movk_i32 s32, 0x400
; ISEL-NEXT: s_addc_u32 s1, s1, 0
-; ISEL-NEXT: s_mov_b32 s13, s15
; ISEL-NEXT: s_mov_b32 s12, s14
-; ISEL-NEXT: s_mov_b64 s[14:15], src_private_base
-; ISEL-NEXT: s_cmp_lg_u32 s32, -1
-; ISEL-NEXT: s_cselect_b32 s15, s15, 0
-; ISEL-NEXT: s_cselect_b32 s17, s32, 0
; ISEL-NEXT: s_waitcnt lgkmcnt(0)
; ISEL-NEXT: s_lshl_b32 s14, s20, 2
; ISEL-NEXT: s_add_i32 s14, s14, 15
; ISEL-NEXT: s_and_b32 s14, s14, -16
+; ISEL-NEXT: s_movk_i32 s32, 0x400
; ISEL-NEXT: s_lshl_b32 s14, s14, 6
-; ISEL-NEXT: s_add_i32 s32, s32, s14
+; ISEL-NEXT: s_add_i32 s17, s32, s14
+; ISEL-NEXT: s_mov_b32 s13, s15
+; ISEL-NEXT: s_cmp_lg_u32 s32, -1
+; ISEL-NEXT: s_mov_b64 s[14:15], src_private_base
+; ISEL-NEXT: s_cselect_b32 s34, s32, 0
+; ISEL-NEXT: s_cselect_b32 s15, s15, 0
; ISEL-NEXT: s_add_u32 s8, s8, 16
; ISEL-NEXT: s_addc_u32 s9, s9, 0
; ISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
@@ -505,16 +488,16 @@ define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
; ISEL-NEXT: s_addc_u32 s19, s19, bar at rel32@hi+12
; ISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; ISEL-NEXT: s_mov_b32 s14, s16
-; ISEL-NEXT: v_mov_b32_e32 v0, s17
+; ISEL-NEXT: v_mov_b32_e32 v0, s34
; ISEL-NEXT: v_mov_b32_e32 v1, s15
; ISEL-NEXT: s_mov_b32 s33, 0
; ISEL-NEXT: v_mov_b32_e32 v40, s22
; ISEL-NEXT: v_mov_b32_e32 v41, s23
-; ISEL-NEXT: v_mov_b32_e32 v42, s17
-; ISEL-NEXT: v_mov_b32_e32 v43, s15
+; ISEL-NEXT: s_mov_b32 s32, s17
; ISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
-; ISEL-NEXT: flat_load_dword v0, v[42:43]
-; ISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; ISEL-NEXT: v_mov_b32_e32 v0, s34
+; ISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; ISEL-NEXT: s_waitcnt vmcnt(0)
; ISEL-NEXT: flat_store_dword v[40:41], v0
; ISEL-NEXT: s_endpgm
;
@@ -553,11 +536,10 @@ define amdgpu_kernel void @dynamic_alloca_i64_kernel(i64 %n, ptr %p) {
; GI-NEXT: s_mov_b32 s33, 0
; GI-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GI-NEXT: v_mov_b32_e32 v0, s34
-; GI-NEXT: v_mov_b32_e32 v1, s35
-; GI-NEXT: flat_load_dword v2, v[0:1]
+; GI-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen
; GI-NEXT: v_mov_b32_e32 v0, s38
; GI-NEXT: v_mov_b32_e32 v1, s39
-; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GI-NEXT: s_waitcnt vmcnt(0)
; GI-NEXT: flat_store_dword v[0:1], v2
; GI-NEXT: s_endpgm
%alloca = alloca i32, i64 %n, align 4
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll
new file mode 100644
index 0000000000000..57dcd96594893
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/alloca-as0.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s -o - | FileCheck %s
+
+declare void @bar(ptr)
+
+define i32 @static_alloca() {
+; CHECK-LABEL: define i32 @static_alloca() {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+; CHECK-NEXT: call void @bar(ptr [[TMP2]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: ret i32 [[LOAD]]
+;
+ %alloca = alloca i32, align 4
+ call void @bar(ptr %alloca)
+ %load = load i32, ptr %alloca
+ ret i32 %load
+}
+
+define i32 @dynamic_alloca(i32 %n) {
+; CHECK-LABEL: define i32 @dynamic_alloca(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, i32 [[N]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+; CHECK-NEXT: call void @bar(ptr [[TMP2]])
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[TMP1]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ %alloca = alloca i32, i32 %n, align 4
+ call void @bar(ptr %alloca)
+ %load = load i32, ptr %alloca
+ ret i32 0
+}
More information about the llvm-branch-commits
mailing list