[llvm] [NVPTX][NVPTXLowerArgs] Add NewPM interface for NVPTXLowerArgs (PR #128960)

Akshay Deodhar via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 28 13:09:41 PST 2025


https://github.com/akshayrdeodhar updated https://github.com/llvm/llvm-project/pull/128960

>From 3d90f7af46d488932a2b9c44302864a725841d7c Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 22:46:24 +0000
Subject: [PATCH 01/11] [NVPTX][NVPTXLowerArgs] Add NewPM interface for
 NVPTXLowerArgs

---
 llvm/lib/Target/NVPTX/NVPTX.h            | 10 +++++++
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 38 +++++++++++-------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index ca915cd3f3732..44be26b12f0c4 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -18,6 +18,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 class FunctionPass;
@@ -74,6 +75,15 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
+  private:
+    TargetMachine &TM;
+
+  public:
+    NVPTXLowerArgsPass(TargetMachine& TM) : TM(TM) {};
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
 namespace NVPTX {
 enum DrvInterface {
   NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index c763b54c8dbfe..0a65a0df8af3c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -168,17 +168,6 @@ namespace {
 class NVPTXLowerArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
-  bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F);
-  bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F);
-
-  // handle byval parameters
-  void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg);
-  // Knowing Ptr must point to the global address space, this function
-  // addrspacecasts Ptr to global and then back to generic. This allows
-  // NVPTXInferAddressSpaces to fold the global-to-generic cast into
-  // loads/stores that appear later.
-  void markPointerAsGlobal(Value *Ptr);
-
 public:
   static char ID; // Pass identification, replacement for typeid
   NVPTXLowerArgs() : FunctionPass(ID) {}
@@ -571,8 +560,7 @@ void copyByValParam(Function &F, Argument &Arg) {
 }
 } // namespace
 
-void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
-                                      Argument *Arg) {
+static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
   Function *Func = Arg->getParent();
   bool HasCvtaParam =
       TM.getSubtargetImpl(*Func)->hasCvtaParam() && isKernelFunction(*Func);
@@ -641,7 +629,11 @@ void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
     copyByValParam(*Func, *Arg);
 }
 
-void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
+// Knowing Ptr must point to the global address space, this function
+// addrspacecasts Ptr to global and then back to generic. This allows
+// NVPTXInferAddressSpaces to fold the global-to-generic cast into
+// loads/stores that appear later.
+static void markPointerAsGlobal(Value *Ptr) {
   if (Ptr->getType()->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC)
     return;
 
@@ -670,13 +662,12 @@ void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
 // =============================================================================
 // Main function for this pass.
 // =============================================================================
-bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
-                                         Function &F) {
+static bool runOnKernelFunction(const NVPTXTargetMachine &TM,Function &F) {
   // Copying of byval aggregates + SROA may result in pointers being loaded as
   // integers, followed by intotoptr. We may want to mark those as global, too,
   // but only if the loaded integer is used exclusively for conversion to a
   // pointer with inttoptr.
-  auto HandleIntToPtr = [this](Value &V) {
+  auto HandleIntToPtr = [](Value &V) {
     if (llvm::all_of(V.users(), [](User *U) { return isa<IntToPtrInst>(U); })) {
       SmallVector<User *, 16> UsersToUpdate(V.users());
       for (User *U : UsersToUpdate)
@@ -721,8 +712,7 @@ bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
 }
 
 // Device functions only need to copy byval args into local memory.
-bool NVPTXLowerArgs::runOnDeviceFunction(const NVPTXTargetMachine &TM,
-                                         Function &F) {
+static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
   LLVM_DEBUG(dbgs() << "Lowering function args of " << F.getName() << "\n");
   for (Argument &Arg : F.args())
     if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
@@ -736,7 +726,6 @@ bool NVPTXLowerArgs::runOnFunction(Function &F) {
   return isKernelFunction(F) ? runOnKernelFunction(TM, F)
                              : runOnDeviceFunction(TM, F);
 }
-
 FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }
 
 static bool copyFunctionByValArgs(Function &F) {
@@ -757,3 +746,12 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,
   return copyFunctionByValArgs(F) ? PreservedAnalyses::none()
                                   : PreservedAnalyses::all();
 }
+
+PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  auto &NTM = static_cast<NVPTXTargetMachine &>(TM);
+  bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
+                                     : runOnDeviceFunction(NTM, F);
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+}
\ No newline at end of file

>From 2e4665f0aecf5aa2411eef27805a058b75d5d9f1 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 23:23:56 +0000
Subject: [PATCH 02/11] clang-format

---
 llvm/lib/Target/NVPTX/NVPTX.h            | 10 +++++-----
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp |  3 +--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 44be26b12f0c4..92d3effb45cda 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -76,12 +76,12 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
 };
 
 struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
-  private:
-    TargetMachine &TM;
+private:
+  TargetMachine &TM;
 
-  public:
-    NVPTXLowerArgsPass(TargetMachine& TM) : TM(TM) {};
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+public:
+  NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 namespace NVPTX {
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 0a65a0df8af3c..7e16af2b0b050 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -662,7 +662,7 @@ static void markPointerAsGlobal(Value *Ptr) {
 // =============================================================================
 // Main function for this pass.
 // =============================================================================
-static bool runOnKernelFunction(const NVPTXTargetMachine &TM,Function &F) {
+static bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F) {
   // Copying of byval aggregates + SROA may result in pointers being loaded as
   // integers, followed by intotoptr. We may want to mark those as global, too,
   // but only if the loaded integer is used exclusively for conversion to a
@@ -753,5 +753,4 @@ PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
   bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
                                      : runOnDeviceFunction(NTM, F);
   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
-
 }
\ No newline at end of file

>From c5c88bd5c084c4e35d230c0ffd498bbf7c411f4f Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Wed, 26 Feb 2025 15:43:38 -0800
Subject: [PATCH 03/11] add newline at end of file

---
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 7e16af2b0b050..02657bdb11504 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -753,4 +753,4 @@ PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
   bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
                                      : runOnDeviceFunction(NTM, F);
   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
-}
\ No newline at end of file
+}

>From dde7018bcd29ae6b28796c2e1ce29d3b918f5366 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 19:54:27 +0000
Subject: [PATCH 04/11] address review comments

---
 llvm/lib/Target/NVPTX/NVPTX.h                |  4 ++--
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp     |  2 +-
 llvm/lib/Target/NVPTX/NVPTXPassRegistry.def  |  1 +
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 ++++++++++++
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll  |  2 ++
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 92d3effb45cda..b2005ca7a83a4 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -77,10 +77,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
 
 struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
 private:
-  TargetMachine &TM;
+  const TargetMachine &TM;
 
 public:
-  NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
+  NVPTXLowerArgsPass(const TargetMachine &TM) : TM(TM) {};
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 02657bdb11504..5960682937d39 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -749,7 +749,7 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,
 
 PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
                                           FunctionAnalysisManager &AM) {
-  auto &NTM = static_cast<NVPTXTargetMachine &>(TM);
+  auto &NTM = static_cast<const NVPTXTargetMachine &>(TM);
   bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
                                      : runOnDeviceFunction(NTM, F);
   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
index 28ea9dd9c0227..34c79b8f77bae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
+++ b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def
@@ -38,4 +38,5 @@ FUNCTION_ALIAS_ANALYSIS("nvptx-aa", NVPTXAA())
 FUNCTION_PASS("nvvm-intr-range", NVVMIntrRangePass())
 FUNCTION_PASS("nvvm-reflect", NVVMReflectPass())
 FUNCTION_PASS("nvptx-copy-byval-args", NVPTXCopyByValArgsPass())
+FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this));
 #undef FUNCTION_PASS
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index f2afa6fc20bfa..53fbeacbc09e7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -21,6 +21,7 @@
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXTargetTransformInfo.h"
 #include "TargetInfo/NVPTXTargetInfo.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/KernelInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -271,6 +272,17 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
       });
 
+  PB.registerPipelineParsingCallback(
+    [this](StringRef PassName, FunctionPassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement>) {
+      if (PassName == "nvptx-lower-args") {
+        PM.addPass(NVPTXLowerArgsPass(*this));
+        return true;
+      }
+      return false;
+    }
+  );
+
   if (!NoKernelInfoEndLTO) {
     PB.registerFullLinkTimeOptimizationLastEPCallback(
         [this](ModulePassManager &PM, OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index a09fb35d2d546..6805facf96866 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -2,6 +2,8 @@
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-copy-byval-args -S | FileCheck %s --check-prefixes=COMMON,COPY
+; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
+; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
 source_filename = "<stdin>"
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"

>From 4fd9a79c9cd0aee7330bb87da6d1da47a9509463 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 20:09:39 +0000
Subject: [PATCH 05/11] clang-format

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 53fbeacbc09e7..831cd18c3124d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -273,15 +273,14 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
       });
 
   PB.registerPipelineParsingCallback(
-    [this](StringRef PassName, FunctionPassManager &PM,
-           ArrayRef<PassBuilder::PipelineElement>) {
-      if (PassName == "nvptx-lower-args") {
-        PM.addPass(NVPTXLowerArgsPass(*this));
-        return true;
-      }
-      return false;
-    }
-  );
+      [this](StringRef PassName, FunctionPassManager &PM,
+             ArrayRef<PassBuilder::PipelineElement>) {
+        if (PassName == "nvptx-lower-args") {
+          PM.addPass(NVPTXLowerArgsPass(*this));
+          return true;
+        }
+        return false;
+      });
 
   if (!NoKernelInfoEndLTO) {
     PB.registerFullLinkTimeOptimizationLastEPCallback(

>From 8ff8bf22828d5d4957a9b75c165513c186a238f8 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 15:01:52 -0800
Subject: [PATCH 06/11] Update lower-byval-args.ll

---
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 6805facf96866..cfe934544eb3a 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
-; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-copy-byval-args -S | FileCheck %s --check-prefixes=COMMON,COPY
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_60 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_60
 ; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s --check-prefixes=COMMON,SM_70
+; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-copy-byval-args -S | FileCheck %s --check-prefixes=COMMON,COPY
 source_filename = "<stdin>"
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"

>From 163ed0fc9b90e39af33cb181685ac8956816b54b Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 15:04:35 -0800
Subject: [PATCH 07/11] remove parsing callback

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 831cd18c3124d..e8fd342e716c6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -272,16 +272,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
       });
 
-  PB.registerPipelineParsingCallback(
-      [this](StringRef PassName, FunctionPassManager &PM,
-             ArrayRef<PassBuilder::PipelineElement>) {
-        if (PassName == "nvptx-lower-args") {
-          PM.addPass(NVPTXLowerArgsPass(*this));
-          return true;
-        }
-        return false;
-      });
-
   if (!NoKernelInfoEndLTO) {
     PB.registerFullLinkTimeOptimizationLastEPCallback(
         [this](ModulePassManager &PM, OptimizationLevel Level) {

>From 2fa9e6474828557e8b875e2e3f944023f81cbfc4 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 23:41:17 +0000
Subject: [PATCH 08/11] address review comments

---
 llvm/lib/Target/NVPTX/NVPTX.h            |  4 ++--
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 21 +++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b2005ca7a83a4..92d3effb45cda 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -77,10 +77,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
 
 struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
 private:
-  const TargetMachine &TM;
+  TargetMachine &TM;
 
 public:
-  NVPTXLowerArgsPass(const TargetMachine &TM) : TM(TM) {};
+  NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {};
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 5960682937d39..d598922c2d1fa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -629,10 +629,9 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
     copyByValParam(*Func, *Arg);
 }
 
-// Knowing Ptr must point to the global address space, this function
-// addrspacecasts Ptr to global and then back to generic. This allows
-// NVPTXInferAddressSpaces to fold the global-to-generic cast into
-// loads/stores that appear later.
+// If `Ptr` is known to point to global memory, add addrspacecast to global
+// and back to generic. This will ensure that load/store operations on `Ptr`
+// are lowered to `{ld,st}.global`
 static void markPointerAsGlobal(Value *Ptr) {
   if (Ptr->getType()->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC)
     return;
@@ -720,12 +719,15 @@ static bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F) {
   return true;
 }
 
-bool NVPTXLowerArgs::runOnFunction(Function &F) {
-  auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
-
+static bool processFunction(Function &F, NVPTXTargetMachine &TM) {
   return isKernelFunction(F) ? runOnKernelFunction(TM, F)
                              : runOnDeviceFunction(TM, F);
 }
+
+bool NVPTXLowerArgs::runOnFunction(Function &F) {
+  auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
+  return processFunction(F, TM);
+}
 FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }
 
 static bool copyFunctionByValArgs(Function &F) {
@@ -749,8 +751,7 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F,
 
 PreservedAnalyses NVPTXLowerArgsPass::run(Function &F,
                                           FunctionAnalysisManager &AM) {
-  auto &NTM = static_cast<const NVPTXTargetMachine &>(TM);
-  bool Changed = isKernelFunction(F) ? runOnKernelFunction(NTM, F)
-                                     : runOnDeviceFunction(NTM, F);
+  auto &NTM = static_cast<NVPTXTargetMachine &>(TM);
+  bool Changed = processFunction(F, NTM);
   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
 }

>From 40b097be1a3f0d7ef3248318f9f8dd52f7e3a732 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Thu, 27 Feb 2025 23:59:15 +0000
Subject: [PATCH 09/11] rename pass

---
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp     | 16 ++++++++--------
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index d598922c2d1fa..097eb57c69a7d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -161,16 +161,16 @@
 using namespace llvm;
 
 namespace llvm {
-void initializeNVPTXLowerArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsLegacyPassPass(PassRegistry &);
 }
 
 namespace {
-class NVPTXLowerArgs : public FunctionPass {
+class NVPTXLowerArgsLegacyPass : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
 public:
   static char ID; // Pass identification, replacement for typeid
-  NVPTXLowerArgs() : FunctionPass(ID) {}
+  NVPTXLowerArgsLegacyPass() : FunctionPass(ID) {}
   StringRef getPassName() const override {
     return "Lower pointer arguments of CUDA kernels";
   }
@@ -180,12 +180,12 @@ class NVPTXLowerArgs : public FunctionPass {
 };
 } // namespace
 
-char NVPTXLowerArgs::ID = 1;
+char NVPTXLowerArgsLegacyPass::ID = 1;
 
-INITIALIZE_PASS_BEGIN(NVPTXLowerArgs, "nvptx-lower-args",
+INITIALIZE_PASS_BEGIN(NVPTXLowerArgsLegacyPass, "nvptx-lower-args",
                       "Lower arguments (NVPTX)", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(NVPTXLowerArgs, "nvptx-lower-args",
+INITIALIZE_PASS_END(NVPTXLowerArgsLegacyPass, "nvptx-lower-args",
                     "Lower arguments (NVPTX)", false, false)
 
 // =============================================================================
@@ -724,11 +724,11 @@ static bool processFunction(Function &F, NVPTXTargetMachine &TM) {
                              : runOnDeviceFunction(TM, F);
 }
 
-bool NVPTXLowerArgs::runOnFunction(Function &F) {
+bool NVPTXLowerArgsLegacyPass::runOnFunction(Function &F) {
   auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
   return processFunction(F, TM);
 }
-FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }
+FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgsLegacyPass(); }
 
 static bool copyFunctionByValArgs(Function &F) {
   LLVM_DEBUG(dbgs() << "Creating a copy of byval args of " << F.getName()
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index e8fd342e716c6..2af50688e0c97 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -99,7 +99,7 @@ void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
 void initializeNVPTXLowerUnreachablePass(PassRegistry &);
 void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
-void initializeNVPTXLowerArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsLegacyPassPass(PassRegistry &);
 void initializeNVPTXProxyRegErasurePass(PassRegistry &);
 void initializeNVVMIntrRangePass(PassRegistry &);
 void initializeNVVMReflectPass(PassRegistry &);
@@ -122,7 +122,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
   initializeNVPTXAllocaHoistingPass(PR);
   initializeNVPTXAssignValidGlobalNamesPass(PR);
   initializeNVPTXAtomicLowerPass(PR);
-  initializeNVPTXLowerArgsPass(PR);
+  initializeNVPTXLowerArgsLegacyPassPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
   initializeNVPTXLowerUnreachablePass(PR);
   initializeNVPTXCtorDtorLoweringLegacyPass(PR);

>From 62bea3fdb4b6b0958a1b1ecac70dd9cd821106b3 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Fri, 28 Feb 2025 00:09:12 +0000
Subject: [PATCH 10/11] clang-format

---
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 097eb57c69a7d..1a7216e0488d8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -728,7 +728,9 @@ bool NVPTXLowerArgsLegacyPass::runOnFunction(Function &F) {
   auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
   return processFunction(F, TM);
 }
-FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgsLegacyPass(); }
+FunctionPass *llvm::createNVPTXLowerArgsPass() {
+  return new NVPTXLowerArgsLegacyPass();
+}
 
 static bool copyFunctionByValArgs(Function &F) {
   LLVM_DEBUG(dbgs() << "Creating a copy of byval args of " << F.getName()

>From e47285b0398887127902fd3033230fe53c578ec1 Mon Sep 17 00:00:00 2001
From: Akshay Deodhar <adeodhar at nvidia.com>
Date: Fri, 28 Feb 2025 13:09:28 -0800
Subject: [PATCH 11/11] remove unnecessary include

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 2af50688e0c97..ae93e4f326914 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -21,7 +21,6 @@
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXTargetTransformInfo.h"
 #include "TargetInfo/NVPTXTargetInfo.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/KernelInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"



More information about the llvm-commits mailing list