[clang] [llvm] [HIP][HIPSTDPAR][NFC] Re-order & adapt `hipstdpar` specific passes (PR #134753)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Sun Apr 13 06:11:12 PDT 2025
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/134753
>From a988ecf63dc79d226c2f7aa1430f65d08256888b Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 8 Apr 2025 00:20:27 +0100
Subject: [PATCH 1/7] Re-order & adapt `hipstdpar` specific passes.
---
clang/lib/CodeGen/BackendUtil.cpp | 8 ++++----
clang/lib/Driver/ToolChains/HIPAMD.cpp | 7 ++++---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 20 ++++++++++++-------
3 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 7557cb8408921..fa5e12d4033a5 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1115,6 +1115,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (CodeGenOpts.LinkBitcodePostopt)
MPM.addPass(LinkInModulesPass(BC));
+ if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
+ LangOpts.HIPStdParInterposeAlloc)
+ MPM.addPass(HipStdParAllocationInterpositionPass());
+
// Add a verifier pass if requested. We don't have to do this if the action
// requires code generation because there will already be a verifier pass in
// the code-generation pipeline.
@@ -1178,10 +1182,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
return;
}
- if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
- LangOpts.HIPStdParInterposeAlloc)
- MPM.addPass(HipStdParAllocationInterpositionPass());
-
// Now that we have all of the passes ready, run them.
{
PrettyStackTraceString CrashInfo("Optimizer");
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index abb83701759ce..52e35a01be58d 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -231,10 +231,11 @@ void HIPAMDToolChain::addClangTargetOptions(
CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
- false))
+ false)) {
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
- if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
- CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
+ if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
+ CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});
+ }
StringRef MaxThreadsPerBlock =
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4b5c70f09155f..03b1693244879 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -802,17 +802,17 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
#include "llvm/Passes/TargetPassRegistry.inc"
- PB.registerPipelineStartEPCallback(
- [](ModulePassManager &PM, OptimizationLevel Level) {
- if (EnableHipStdPar)
- PM.addPass(HipStdParAcceleratorCodeSelectionPass());
- });
-
PB.registerPipelineEarlySimplificationEPCallback(
[](ModulePassManager &PM, OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
- if (!isLTOPreLink(Phase))
+ if (!isLTOPreLink(Phase)) {
+ // When we are not using -fgpu-rdc, we can run accelerator code
+ // selection relatively early, but still after linking to prevent
+ // eager removal of potentially reachable symbols.
+ if (EnableHipStdPar)
+ PM.addPass(HipStdParAcceleratorCodeSelectionPass());
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+ }
if (Level == OptimizationLevel::O0)
return;
@@ -883,6 +883,12 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
+ // When we are using -fgpu-rdc, we can onky run accelerator code
+ // selection after linking to prevent, otherwise we end up removing
+ // potentially reachable symbols that were exported as external in other
+ // modules.
+ if (EnableHipStdPar)
+ PM.addPass(HipStdParAcceleratorCodeSelectionPass());
// We want to support the -lto-partitions=N option as "best effort".
// For that, we need to lower LDS earlier in the pipeline before the
// module is partitioned for codegen.
>From 5cd1abb217d7fb2dd1f33c94a4f285b9aacd8dde Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 8 Apr 2025 00:27:18 +0100
Subject: [PATCH 2/7] Fix formatting.
---
clang/lib/CodeGen/BackendUtil.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index fa5e12d4033a5..f7eb853beb23c 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1117,7 +1117,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
LangOpts.HIPStdParInterposeAlloc)
- MPM.addPass(HipStdParAllocationInterpositionPass());
+ MPM.addPass(HipStdParAllocationInterpositionPass());
// Add a verifier pass if requested. We don't have to do this if the action
// requires code generation because there will already be a verifier pass in
>From 2765739128a30b1dc94a8e8d8ab76a6f91c88e6a Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 8 Apr 2025 18:54:28 +0100
Subject: [PATCH 3/7] Add tests.
---
.../rdc-does-not-enable-hipstdpar.cpp | 11 ++++++++++
.../select-accelerator-code-pass-ordering.cpp | 21 +++++++++++++++++++
2 files changed, 32 insertions(+)
create mode 100644 clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
create mode 100644 clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
diff --git a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
new file mode 100644
index 0000000000000..c737fc9a42423
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
@@ -0,0 +1,11 @@
+// Check that if we are compiling with fgpu-rdc amdgpu-enable-hipstdpar is not
+// passed to CC1, to avoid eager, per TU, removal of potentially accessible
+// functions.
+
+// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc \
+// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s
+// NORDC: {{".*clang.*".* "-triple" "amdgcn-amd-amdhsa".* "-mllvm" "-amdgpu-enable-hipstdpar".*}}
+
+// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc -fgpu-rdc \
+// RUN: 2>&1 | FileCheck -check-prefix=RDC %s
+// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
\ No newline at end of file
diff --git a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
new file mode 100644
index 0000000000000..81aedc9cbcf03
--- /dev/null
+++ b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
@@ -0,0 +1,21 @@
+// Test that the accelerator code selection pass only gets invoked after linking
+
+// Ensure Pass HipStdParAcceleratorCodeSelectionPass is not invoked in PreLink.
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -flto -emit-llvm-bc -fcuda-is-device -fdebug-pass-manager \
+// RUN: %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-PRE %s
+// HIPSTDPAR-PRE-NOT: Running pass: HipStdParAcceleratorCodeSelectionPass
+
+// Ensure Pass HipStdParAcceleratorCodeSelectionPass is invoked in PostLink.
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -fcuda-is-device -fdebug-pass-manager -emit-llvm \
+// RUN: %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-POST %s
+// HIPSTDPAR-POST: Running pass: HipStdParAcceleratorCodeSelection
+
+#define __device__ __attribute__((device))
+
+void foo(float *a, float b) {
+ *a = b;
+}
+
+__device__ void bar(float *a, float b) {
+ *a = b;
+}
>From 9df1e62007e40c91227945b24da7c634d801c587 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 11 Apr 2025 23:35:45 +0100
Subject: [PATCH 4/7] Fix typos.
---
clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
index c737fc9a42423..da1c3943553f0 100644
--- a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
+++ b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
@@ -8,4 +8,4 @@
// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc -fgpu-rdc \
// RUN: 2>&1 | FileCheck -check-prefix=RDC %s
-// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
\ No newline at end of file
+// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dce6e594873aa..5fec974c5f9a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -886,7 +886,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
- // When we are using -fgpu-rdc, we can onky run accelerator code
+ // When we are using -fgpu-rdc, we can only run accelerator code
// selection after linking to prevent, otherwise we end up removing
// potentially reachable symbols that were exported as external in other
// modules.
>From 845807ee2cc885963b7583df96f93959292c95e8 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sat, 12 Apr 2025 00:00:01 +0100
Subject: [PATCH 5/7] Update test.
---
clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
index da1c3943553f0..31bf20e9c2eb3 100644
--- a/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
+++ b/clang/test/CodeGenHipStdPar/rdc-does-not-enable-hipstdpar.cpp
@@ -4,8 +4,8 @@
// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc \
// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s
-// NORDC: {{".*clang.*".* "-triple" "amdgcn-amd-amdhsa".* "-mllvm" "-amdgpu-enable-hipstdpar".*}}
+// NORDC: {{.*}}"-mllvm" "-amdgpu-enable-hipstdpar"
// RUN: %clang -### --hipstdpar --offload-arch=gfx906 %s -nogpulib -nogpuinc -fgpu-rdc \
// RUN: 2>&1 | FileCheck -check-prefix=RDC %s
-// RDC-NOT: {{"-mllvm" "-amdgpu-enable-hipstdpar".*}}
+// RDC-NOT: {{.*}}"-mllvm" "-amdgpu-enable-hipstdpar"
>From 0af199af01c349ad909d6cda932efc11221ad6ef Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sat, 12 Apr 2025 15:37:24 +0100
Subject: [PATCH 6/7] Apply suggestions.
---
.../select-accelerator-code-pass-ordering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
index 81aedc9cbcf03..c51234af78904 100644
--- a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
+++ b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
@@ -2,12 +2,12 @@
// Ensure Pass HipStdParAcceleratorCodeSelectionPass is not invoked in PreLink.
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -flto -emit-llvm-bc -fcuda-is-device -fdebug-pass-manager \
-// RUN: %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-PRE %s
+// RUN: %s -o /dev/null 2>&1 | FileCheck --check-prefix=HIPSTDPAR-PRE %s
// HIPSTDPAR-PRE-NOT: Running pass: HipStdParAcceleratorCodeSelectionPass
// Ensure Pass HipStdParAcceleratorCodeSelectionPass is invoked in PostLink.
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -fcuda-is-device -fdebug-pass-manager -emit-llvm \
-// RUN: %s -o - 2>&1 | FileCheck --check-prefix=HIPSTDPAR-POST %s
+// RUN: %s -o /dev/null 2>&1 | FileCheck --check-prefix=HIPSTDPAR-POST %s
// HIPSTDPAR-POST: Running pass: HipStdParAcceleratorCodeSelection
#define __device__ __attribute__((device))
>From 202d6b9a4cd017a829396bbc0bc563d5386990a9 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sun, 13 Apr 2025 13:24:22 +0100
Subject: [PATCH 7/7] Use -NEXT checks.
---
.../CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
index c51234af78904..c70b651397527 100644
--- a/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
+++ b/clang/test/CodeGenHipStdPar/select-accelerator-code-pass-ordering.cpp
@@ -3,7 +3,10 @@
// Ensure Pass HipStdParAcceleratorCodeSelectionPass is not invoked in PreLink.
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -flto -emit-llvm-bc -fcuda-is-device -fdebug-pass-manager \
// RUN: %s -o /dev/null 2>&1 | FileCheck --check-prefix=HIPSTDPAR-PRE %s
+// HIPSTDPAR-PRE: Running pass: EntryExitInstrumenterPass
+// HIPSTDPAR-PRE-NEXT: Running pass: EntryExitInstrumenterPass
// HIPSTDPAR-PRE-NOT: Running pass: HipStdParAcceleratorCodeSelectionPass
+// HIPSTDPAR-PRE-NEXT: Running pass: AlwaysInlinerPass
// Ensure Pass HipStdParAcceleratorCodeSelectionPass is invoked in PostLink.
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -mllvm -amdgpu-enable-hipstdpar -fcuda-is-device -fdebug-pass-manager -emit-llvm \
More information about the cfe-commits
mailing list