[llvm] 4973b0c - [AMDGPU] Disable forceful inline of non-kernel functions which use LDS.

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 14 20:44:04 PDT 2021


Author: hsmahesha
Date: 2021-04-15T09:12:56+05:30
New Revision: 4973b0c4e774e18452f2a7da23af4dec7d368bfa

URL: https://github.com/llvm/llvm-project/commit/4973b0c4e774e18452f2a7da23af4dec7d368bfa
DIFF: https://github.com/llvm/llvm-project/commit/4973b0c4e774e18452f2a7da23af4dec7d368bfa.diff

LOG: [AMDGPU] Disable forceful inline of non-kernel functions which use LDS.

Now since LDS uses within non-kernel functions are being handled in the
pass - LowerModuleLDS, we *NO* need to *forcefully* inline non-kernel
functions just because they use LDS. Do forceful inlining only when the
pass - LowerModuleLDS is not enabled. It is enabled by default.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D100481

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
    llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
    llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 51af250509509..2af9fc9558753 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -120,10 +120,10 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
   for (GlobalVariable &GV : M.globals()) {
     // TODO: Region address
     unsigned AS = GV.getAddressSpace();
-    if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
-      continue;
-
-    recursivelyVisitUsers(GV, FuncsToAlwaysInline);
+    if ((AS == AMDGPUAS::REGION_ADDRESS) ||
+        (AS == AMDGPUAS::LOCAL_ADDRESS &&
+         !AMDGPUTargetMachine::EnableLowerModuleLDS))
+      recursivelyVisitUsers(GV, FuncsToAlwaysInline);
   }
 
   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 903f3eaebe04d..ce39609da303d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -193,10 +193,10 @@ static cl::opt<bool> EnableStructurizerWorkarounds(
     cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
     cl::Hidden);
 
-static cl::opt<bool>
-    EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::Hidden,
-                         cl::desc("Enable lower module lds pass"),
-                         cl::init(true));
+static cl::opt<bool, true> EnableLowerModuleLDS(
+    "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
+    cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
+    cl::Hidden);
 
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   // Register the target
@@ -393,6 +393,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
 bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
 bool AMDGPUTargetMachine::EnableFunctionCalls = false;
 bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
+bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
 
 AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 95aefa23c24ca..cab02b7555c6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
   static bool EnableLateStructurizeCFG;
   static bool EnableFunctionCalls;
   static bool EnableFixedFunctionABI;
+  static bool EnableLowerModuleLDS;
 
   AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
                       StringRef FS, TargetOptions Options,

diff  --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
index 6d90cf95fde58..dc400e06cb128 100644
--- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
 
 @lds0 = addrspace(3) global i32 undef, align 4
 

diff  --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
index 8e423ef3a786c..99b334691538c 100644
--- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
+++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
@@ -1,7 +1,7 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 


        


More information about the llvm-commits mailing list