[llvm] 95a834a - (Reland) [AMDGPU] Run LowerLDS at the end of the fullLTO pipeline (#85626)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 21 03:44:51 PDT 2024
Author: Pierre van Houtryve
Date: 2024-03-21T11:44:47+01:00
New Revision: 95a834a16c3de0de615d0cfa20a6c8bd973b6a1d
URL: https://github.com/llvm/llvm-project/commit/95a834a16c3de0de615d0cfa20a6c8bd973b6a1d
DIFF: https://github.com/llvm/llvm-project/commit/95a834a16c3de0de615d0cfa20a6c8bd973b6a1d.diff
LOG: (Reland) [AMDGPU] Run LowerLDS at the end of the fullLTO pipeline (#85626)
Reland of #75333
Added:
llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2b457fe519d96c..c96625092a76c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -793,6 +793,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
});
+
+ PB.registerFullLinkTimeOptimizationLastEPCallback(
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
+ // We want to support the -lto-partitions=N option as "best effort".
+ // For that, we need to lower LDS earlier in the pipeline before the
+ // module is partitioned for codegen.
+ if (EnableLowerModuleLDS)
+ PM.addPass(AMDGPULowerModuleLDSPass(*this));
+ });
}
int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
diff --git a/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
new file mode 100644
index 00000000000000..f1d946376afe06
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
@@ -0,0 +1,47 @@
+
+; Default O0
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -O0 -cg-opt-level 0 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Unified O0
+; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -unified-lto=full -O0 -cg-opt-level 0 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Default O1
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -O1 -cg-opt-level 1 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Unified O1
+; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -unified-lto=full -O1 -cg-opt-level 1 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Default O2
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -O2 -cg-opt-level 2 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Unified O2
+; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -unified-lto=full -O2 -cg-opt-level 2 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Default O3
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -O3 -cg-opt-level 3 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; Unified O3
+; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
+; RUN: llvm-lto2 run -unified-lto=full -O3 -cg-opt-level 3 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
+
+; First print will be from the New PM during the full LTO pipeline.
+; Second print will be from the legacy PM during the CG pipeline.
+
+; CHECK: Running pass: AMDGPULowerModuleLDSPass on [module]
+; CHECK: ModulePass Manager
+; CHECK: Lower uses of LDS variables from non-kernel functions
+
+ at lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
+
+define amdgpu_kernel void @test() {
+entry:
+ store i32 1, ptr addrspace(3) @lds
+ ret void
+}
More information about the llvm-commits
mailing list