[llvm] [NVPTX][NVPTXLowerArgs] Add NewPM interface for NVPTXLowerArgs (PR #128960)
Akshay Deodhar via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 11:54:52 PST 2025
https://github.com/akshayrdeodhar updated https://github.com/llvm/llvm-project/pull/128960
>From 3d90f7af46d488932a2b9c44302864a725841d7c Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 22:46:24 +0000
Subject: [PATCH 1/4] [NVPTX][NVPTXLowerArgs] Add NewPM interface for
NVPTXLowerArgs
---
llvm/lib/Target/NVPTX/NVPTX.h | 10 +++++++
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 38 +++++++++++-------------
2 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index ca915cd3f3732..44be26b12f0c4 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class FunctionPass;
@@ -74,6 +75,15 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
+ private:
+ TargetMachine &TM;
+
+ public:
+ NVPTXLowerArgsPass(TargetMachine& TM) : TM(TM) {};
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index c763b54c8dbfe..0a65a0df8af3c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -168,17 +168,6 @@ namespace {
class NVPTXLowerArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
- bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F);
- bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F);
-
- // handle byval parameters
- void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg);
- // Knowing Ptr must point to the global address space, this function
- // addrspacecasts Ptr to global and then back to generic. This allows
- // NVPTXInferAddressSpaces to fold the global-to-generic cast into
- // loads/stores that appear later.
- void markPointerAsGlobal(Value *Ptr);
-
public:
static char ID; // Pass identification, replacement for typeid
NVPTXLowerArgs() : FunctionPass(ID) {}
@@ -571,8 +560,7 @@ void copyByValParam(Function &F, Argument &Arg) {
}
} // namespace
-void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
- Argument *Arg) {
+static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
Function *Func = Arg->getParent();
bool HasCvtaParam =
TM.getSubtargetImpl(*Func)->hasCvtaParam() && isKernelFunction(*Func);
@@ -641,7 +629,11 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
copyByValParam(*Func, *Arg);
}
-void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
+// Knowing Ptr must point to the global address space, this function
+// addrspacecasts Ptr to global and then back to generic. This allows
+// NVPTXInferAddressSpaces to fold the global-to-generic cast into
+// loads/stores that appear later.
+static void markPointerAsGlobal(Value *Ptr) {
if (Ptr->getType()->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC)
return;
@@ -670,13 +662,12 @@ void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
- Function &F) {
+static bool runOnKernelFunction(const NVPTXTargetMachine &TM,Function &F) {
// Copying of byval aggregates + SROA may result in pointers being loaded as
// integers, followed by intotoptr. We may want to mark those as global, too,
// but only if the loaded integer is used exclusively for conversion to a
// pointer with inttoptr.
- auto HandleIntToPtr = [this](Value &V) {
+ auto HandleIntToPtr = [](Value &V) {
if (llvm::all_of(V.users(), [](User *U) { return isa<IntToPtrInst>(U); })) {
SmallVector<User *, 16> UsersToUpdate(V.users());
for (User *U : UsersToUpdate)
@@ -721,8 +712,7 @@ bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
}
// Device functions only need to copy byval args into local memory.
-bool NVPTXLowerArgs::runOnDeviceFunction(const NVPTXTargetMachine &TM,
- Function &F) {
+static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
LLVM_DEBUG(dbgs() << "Lowering function args of " << F.getName() << "\n");
for (Argument &Arg : F.args())
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
@@ -736,7 +726,6 @@ bool NVPTXLowerArgs::runOnFunction(Function &F) {
return isKernelFunction(F) ? runOnKernelFunction(TM, F)
: runOnDeviceFunction(TM, F);
}
-
FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }
static bool copyFunctionByValArgs(Function &F) {
@@ -757,3 +746,12 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,
return copyFunctionByValArgs(F) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
}
+
+PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &NTM = static_cast<NVPTXTargetMachine &>(TM);
+ bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
+ : runOnDeviceFunction(NTM, F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+}
\ No newline at end of file
>From 2e4665f0aecf5aa2411eef27805a058b75d5d9f1 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 23:23:56 +0000
Subject: [PATCH 2/4] clang-format
---
llvm/lib/Target/NVPTX/NVPTX.h | 10 +++++-----
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 3 +--
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 44be26b12f0c4..92d3effb45cda 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -76,12 +76,12 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
};
struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
- private:
- TargetMachine &TM;
+private:
+ TargetMachine &TM;
- public:
- NVPTXLowerArgsPass(TargetMachine& TM) : TM(TM) {};
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+public:
+ NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
namespace NVPTX {
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 0a65a0df8af3c..7e16af2b0b050 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -662,7 +662,7 @@ static void markPointerAsGlobal(Value *Ptr) {
// =============================================================================
// Main function for this pass.
// =============================================================================
-static bool runOnKernelFunction(const NVPTXTargetMachine &TM,Function &F) {
+static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
// Copying of byval aggregates + SROA may result in pointers being loaded as
// integers, followed by intotoptr. We may want to mark those as global, too,
// but only if the loaded integer is used exclusively for conversion to a
@@ -753,5 +753,4 @@ PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
: runOnDeviceFunction(NTM, F);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
-
}
\ No newline at end of file
>From c5c88bd5c084c4e35d230c0ffd498bbf7c411f4f Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 15:43:38 -0800
Subject: [PATCH 3/4] add newline at end of file
---
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 7e16af2b0b050..02657bdb11504 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -753,4 +753,4 @@ PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
: runOnDeviceFunction(NTM, F);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
-}
\ No newline at end of file
+}
>From dde7018bcd29ae6b28796c2e1ce29d3b918f5366 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 19:54:27 +0000
Subject: [PATCH 4/4] address review comments
---
llvm/lib/Target/NVPTX/NVPTX.h | 4 ++--
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXPassRegistry.def | 1 +
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 ++++++++++++
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 2 ++
5 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 92d3effb45cda..b2005ca7a83a4 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -77,10 +77,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
private:
- TargetMachine &TM;
+ const TargetMachine &TM;
public:
- NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
+ NVPTXLowerArgsPass(const TargetMachine &TM) : TM(TM) {};
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 02657bdb11504..5960682937d39 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -749,7 +749,7 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,
PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &NTM = static_cast<NVPTXTargetMachine &>(TM);
+ auto &NTM = static_cast<const NVPTXTargetMachine &>(TM);
bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
: runOnDeviceFunction(NTM, F);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
index 28ea9dd9c0227..34c79b8f77bae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
+++ b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
@@ -38,4 +38,5 @@ FUNCTION_ALIAS_ANALYSIS("nvptx-aa", NVPTXAA())
FUNCTION_PASS("nvvm-intr-range", NVVMIntrRangePass())
FUNCTION_PASS("nvvm-reflect", NVVMReflectPass())
FUNCTION_PASS("nvptx-copy-byval-args", NVPTXCopyByValArgsPass())
+FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this));
#undef FUNCTION_PASS
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index f2afa6fc20bfa..53fbeacbc09e7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -21,6 +21,7 @@
#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"
#include "TargetInfo/NVPTXTargetInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/KernelInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -271,6 +272,17 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
+ PB.registerPipelineParsingCallback(
+ [this](StringRef PassName, FunctionPassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "nvptx-lower-args") {
+ PM.addPass(NVPTXLowerArgsPass(*this));
+ return true;
+ }
+ return false;
+ }
+ );
+
if (!NoKernelInfoEndLTO) {
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index a09fb35d2d546..6805facf96866 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -2,6 +2,8 @@
; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-copy-byval-args -S | FileCheck %s --check-prefixes=COMMON,COPY
+; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
+; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
source_filename = "<stdin>"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
More information about the llvm-commits
mailing list