[PATCH] D100481: [AMDGPU] Disable forceful inline of non-kernel functions which use LDS.
Mahesha S via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 14 11:22:42 PDT 2021
hsmhsm updated this revision to Diff 337504.
hsmhsm added a comment.
Fixed review comments by Jon.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100481/new/
https://reviews.llvm.org/D100481
Files:
llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
Index: llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
+++ llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll
@@ -1,7 +1,7 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -passes=amdgpu-always-inline -amdgpu-enable-lower-module-lds=false %s | FileCheck --check-prefix=ALL %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
Index: llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
+++ llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-lower-module-lds=false < %s | FileCheck -check-prefix=GCN %s
@lds0 = addrspace(3) global i32 undef, align 4
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -35,6 +35,7 @@
static bool EnableLateStructurizeCFG;
static bool EnableFunctionCalls;
static bool EnableFixedFunctionABI;
+ static bool EnableLowerModuleLDS;
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -193,10 +193,10 @@
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool>
- EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::Hidden,
- cl::desc("Enable lower module lds pass"),
- cl::init(true));
+static cl::opt<bool, true> EnableLowerModuleLDS(
+ "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
+ cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
+ cl::Hidden);
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
@@ -393,6 +393,7 @@
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
+bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
Index: llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -120,10 +120,10 @@
for (GlobalVariable &GV : M.globals()) {
// TODO: Region address
unsigned AS = GV.getAddressSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)
- continue;
-
- recursivelyVisitUsers(GV, FuncsToAlwaysInline);
+ if ((AS == AMDGPUAS::REGION_ADDRESS) ||
+ (AS == AMDGPUAS::LOCAL_ADDRESS &&
+ !AMDGPUTargetMachine::EnableLowerModuleLDS))
+ recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100481.337504.patch
Type: text/x-patch
Size: 5220 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210414/124517c5/attachment.bin>
More information about the llvm-commits
mailing list