[PATCH] D117494: AMDGPU: Account for usage HIP-style dynamic LDS
Siu Chi Chan via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 17 08:15:11 PST 2022
scchan created this revision.
Herald added subscribers: foad, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
scchan requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
Disable promote alloca to LDS when HIP-style dynamic LDS since the size
is unknown at compile time.
Change-Id: I13244a767cf172f63d2261bf6dd78ee8d2c21c44
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D117494
Files:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
Index: llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
+++ llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
@@ -5,6 +5,7 @@
@all_lds = internal unnamed_addr addrspace(3) global [16384 x i32] undef, align 4
@some_lds = internal unnamed_addr addrspace(3) global [32 x i32] undef, align 4
+ at some_dynamic_lds = external hidden addrspace(3) global [0 x i32], align 4
@initializer_user_some = addrspace(1) global i32 ptrtoint ([32 x i32] addrspace(3)* @some_lds to i32), align 4
@initializer_user_all = addrspace(1) global i32 ptrtoint ([16384 x i32] addrspace(3)* @all_lds to i32), align 4
@@ -62,6 +63,33 @@
ret void
}
+; Has a constant expression use through a single level of constant
+; expression, but usage of dynamic LDS should block promotion
+
+; IR-LABEL: @constant_expression_uses_some_dynamic_lds(
+; IR: alloca
+
+; ASM-LABEL: {{^}}constant_expression_uses_some_dynamic_lds:
+; ASM: .amdhsa_group_segment_fixed_size 0{{$}}
+define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
+entry:
+ %stack = alloca [4 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %gep1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %gep2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 2
+ %gep3 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 3
+ store i32 9, i32 addrspace(5)* %gep0
+ store i32 10, i32 addrspace(5)* %gep1
+ store i32 99, i32 addrspace(5)* %gep2
+ store i32 43, i32 addrspace(5)* %gep3
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %idx
+ %load = load i32, i32 addrspace(5)* %arrayidx, align 4
+ store i32 %load, i32 addrspace(1)* %out
+ %gep_dyn_lds = getelementptr inbounds [0 x i32], [0 x i32]* addrspacecast ([0 x i32] addrspace(3)* @some_dynamic_lds to [0 x i32]*), i64 0, i64 0
+ store i32 1234, i32* %gep_dyn_lds, align 4
+ ret void
+}
+
declare void @callee(i8*)
; IR-LABEL: @constant_expression_uses_all_lds_multi_level(
@@ -111,6 +139,29 @@
ret void
}
+; IR-LABEL: @constant_expression_uses_some_dynamic_lds_multi_level(
+; IR: alloca
+
+; ASM-LABEL: {{^}}constant_expression_uses_some_dynamic_lds_multi_level:
+; ASM: .amdhsa_group_segment_fixed_size 0{{$}}
+define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds_multi_level(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
+entry:
+ %stack = alloca [4 x i32], align 4, addrspace(5)
+ %gep0 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 0
+ %gep1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 1
+ %gep2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 2
+ %gep3 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 3
+ store i32 9, i32 addrspace(5)* %gep0
+ store i32 10, i32 addrspace(5)* %gep1
+ store i32 99, i32 addrspace(5)* %gep2
+ store i32 43, i32 addrspace(5)* %gep3
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %idx
+ %load = load i32, i32 addrspace(5)* %arrayidx, align 4
+ store i32 %load, i32 addrspace(1)* %out
+ call void @callee(i8* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* getelementptr inbounds ([0 x i32], [0 x i32] addrspace(3)* @some_dynamic_lds, i32 0, i32 0) to i8 addrspace(3)*) to i8*))
+ ret void
+}
+
; IR-LABEL: @constant_expression_uses_some_lds_global_initializer(
; IR-NOT: alloca
; IR: llvm.amdgcn.workitem.id
Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -781,6 +781,18 @@
}
}
+ // HIP uses an extern unsized array in local address space for dynamically
+ // allocated shared memory. In that case, we have to disable the promotion.
+ for (const GlobalVariable *GV : UsedLDS) {
+ if (GV->hasExternalLinkage()) {
+ LocalMemLimit = 0;
+ LLVM_DEBUG(dbgs() << "Function has a reference to externally allocated "
+ "local memory. Promoting to local memory "
+ "disabled.\n");
+ return false;
+ }
+ }
+
const DataLayout &DL = Mod->getDataLayout();
SmallVector<std::pair<uint64_t, Align>, 16> AllocatedSizes;
AllocatedSizes.reserve(UsedLDS.size());
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D117494.400560.patch
Type: text/x-patch
Size: 4730 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220117/e9d82e94/attachment.bin>
More information about the llvm-commits
mailing list