[llvm] 106959a - [AMDGPU] Inline non-kernel functions using extern lds

Thu Sep 16 10:59:30 PDT 2021

Author: Vang Thao
Date: 2021-09-16T10:58:51-07:00
New Revision: 106959acc15c4b98e252af0c43406aa9342a2e05

URL: https://github.com/llvm/llvm-project/commit/106959acc15c4b98e252af0c43406aa9342a2e05
DIFF: https://github.com/llvm/llvm-project/commit/106959acc15c4b98e252af0c43406aa9342a2e05.diff

LOG: [AMDGPU] Inline non-kernel functions using extern lds

In https://reviews.llvm.org/D100481, forceful inline of all non-kernel
functions using lds was disabled since AMDGPULowerModuleLDS pass now handles
static lds. However that pass does not handle extern lds so non-kernel
functions using extern lds must sill be inline.

Reviewed By: hsmhsm, arsenm

Differential Revision: https://reviews.llvm.org/D109773

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
    llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 2af9fc9558753..ead8f90a490ba 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -122,7 +122,7 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
     unsigned AS = GV.getAddressSpace();
     if ((AS == AMDGPUAS::REGION_ADDRESS) ||
         (AS == AMDGPUAS::LOCAL_ADDRESS &&
-         !AMDGPUTargetMachine::EnableLowerModuleLDS))
+         (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
       recursivelyVisitUsers(GV, FuncsToAlwaysInline);
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
index 881dfaba1f5c7..15bca082fffc8 100644
--- a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
+++ b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
@@ -135,4 +135,25 @@ define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
   ret void
 }
 
+; CHECK-LABEL: dynamic_shared_array_with_call:
+; CHECK-NOT: s_swappc_b64
+define amdgpu_kernel void @dynamic_shared_array_with_call(float addrspace(1)* nocapture readnone %out) local_unnamed_addr {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = sext i32 %tid.x to i64
+  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i64 0, i64 %1
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+  tail call void @store_value(float %val0)
+  ret void
+}
+
+; CHECK-NOT: store_value
+define linkonce_odr hidden void @store_value(float %val1) local_unnamed_addr {
+entry:
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %0 = sext i32 %tid.x to i64
+  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i64 0, i64 %0
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()