[llvm] f786916 - [CodeGen][NPM] Support CodeGenSCCOrder in pipeline (#136818)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 8 23:02:24 PDT 2025


Author: Akshat Oke
Date: 2025-07-09T11:32:21+05:30
New Revision: f78691630ae38a2663e9349a181ad7765c7be769

URL: https://github.com/llvm/llvm-project/commit/f78691630ae38a2663e9349a181ad7765c7be769
DIFF: https://github.com/llvm/llvm-project/commit/f78691630ae38a2663e9349a181ad7765c7be769.diff

LOG: [CodeGen][NPM] Support CodeGenSCCOrder in pipeline (#136818)

Wrap passes into Post order CGSCC pass manager in codegen pass builder.

I am adding the pipeline test in this but it is not yet complete.

Added: 
    llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll

Modified: 
    llvm/include/llvm/Passes/CodeGenPassBuilder.h
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6ed9ac47405d3..a3b19af4adc39 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -210,10 +211,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   class AddIRPass {
   public:
     AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {}
-    ~AddIRPass() {
-      if (!FPM.isEmpty())
-        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-    }
+    ~AddIRPass() { flushFPMToMPM(); }
 
     template <typename PassT>
     void operator()(PassT &&Pass, StringRef Name = PassT::name()) {
@@ -231,16 +229,40 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         FPM.addPass(std::forward<PassT>(Pass));
       } else {
         // Add Module Pass
-        if (!FPM.isEmpty()) {
-          MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-          FPM = FunctionPassManager();
-        }
-
+        flushFPMToMPM();
         MPM.addPass(std::forward<PassT>(Pass));
       }
     }
 
+    /// Setting this will add passes to the CGSCC pass manager.
+    void requireCGSCCOrder() {
+      if (PB.AddInCGSCCOrder)
+        return;
+      flushFPMToMPM();
+      PB.AddInCGSCCOrder = true;
+    }
+
+    /// Stop adding passes to the CGSCC pass manager.
+    /// Existing passes won't be removed.
+    void stopAddingInCGSCCOrder() {
+      if (!PB.AddInCGSCCOrder)
+        return;
+      flushFPMToMPM();
+      PB.AddInCGSCCOrder = false;
+    }
+
   private:
+    void flushFPMToMPM() {
+      if (FPM.isEmpty())
+        return;
+      if (PB.AddInCGSCCOrder) {
+        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+            createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+      } else {
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      }
+      FPM = FunctionPassManager();
+    }
     ModulePassManager &MPM;
     FunctionPassManager FPM;
     const DerivedT &PB;
@@ -252,13 +274,17 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
     AddMachinePass(ModulePassManager &MPM, const DerivedT &PB)
         : MPM(MPM), PB(PB) {}
     ~AddMachinePass() {
-      if (!MFPM.isEmpty()) {
-        FunctionPassManager FPM;
-        FPM.addPass(
-            createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
-        FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
+      if (MFPM.isEmpty())
+        return;
+
+      FunctionPassManager FPM;
+      FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+      FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
+      if (this->PB.AddInCGSCCOrder) {
+        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+            createCGSCCToFunctionPassAdaptor(std::move(FPM))));
+      } else
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-      }
     }
 
     template <typename PassT>
@@ -276,12 +302,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         MFPM.addPass(std::forward<PassT>(Pass));
       } else {
         // Add Module Pass
-        if (!MFPM.isEmpty()) {
-          MPM.addPass(createModuleToFunctionPassAdaptor(
-              createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
-          MFPM = MachineFunctionPassManager();
-        }
-
+        flushMFPMToMPM();
         MPM.addPass(std::forward<PassT>(Pass));
       }
 
@@ -289,7 +310,39 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
         C(Name, MFPM);
     }
 
+    /// Setting this will add passes to the CGSCC pass manager.
+    void requireCGSCCOrder() {
+      if (PB.AddInCGSCCOrder)
+        return;
+      flushMFPMToMPM();
+      PB.AddInCGSCCOrder = true;
+    }
+
+    /// Stop adding passes to the CGSCC pass manager.
+    /// Existing passes won't be removed.
+    void stopAddingInCGSCCOrder() {
+      if (!PB.AddInCGSCCOrder)
+        return;
+      flushMFPMToMPM();
+      PB.AddInCGSCCOrder = false;
+    }
+
   private:
+    void flushMFPMToMPM() {
+      if (MFPM.isEmpty())
+        return;
+
+      if (PB.AddInCGSCCOrder) {
+        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+            createCGSCCToFunctionPassAdaptor(
+                createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)))));
+      } else {
+        MPM.addPass(createModuleToFunctionPassAdaptor(
+            createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
+      }
+      MFPM = MachineFunctionPassManager();
+    }
+
     ModulePassManager &MPM;
     MachineFunctionPassManager MFPM;
     const DerivedT &PB;
@@ -555,6 +608,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   /// Helper variable for `-start-before/-start-after/-stop-before/-stop-after`
   mutable bool Started = true;
   mutable bool Stopped = true;
+  mutable bool AddInCGSCCOrder = false;
 };
 
 template <typename Derived, typename TargetMachineT>
@@ -813,6 +867,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
     AddIRPass &addPass) const {
   derived().addPreISel(addPass);
 
+  if (Opt.RequiresCodeGenSCCOrder)
+    addPass.requireCGSCCOrder();
+
   addPass(CallBrPreparePass());
   // Add both the safe stack and the stack protection passes: each of them will
   // only protect functions that have corresponding attributes.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c3536113e9bef..ef8ff53136b73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2099,6 +2099,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
   // being run on them, which causes crashes in the resource usage analysis).
   addPass(AMDGPULowerBufferFatPointersPass(TM));
 
+  addPass.requireCGSCCOrder();
+
   Base::addCodeGenPrepare(addPass);
 
   if (isPassEnabled(EnableLoadStoreVectorizer))

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
new file mode 100644
index 0000000000000..5155ec212c12f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -0,0 +1,16 @@
+; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -O0 -print-pipeline-passes < %s 2>&1 \
+; RUN:   | FileCheck -check-prefix=GCN-O0 %s
+
+; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
+; RUN:   | FileCheck -check-prefix=GCN-O2 %s
+
+; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
+; RUN:   | FileCheck -check-prefix=GCN-O3 %s
+
+; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,require<machine-loops>,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,require<machine-loops>,phi-node-elimination,two-address-instruction,register-coalescer,rename-independent-subregs,machine-scheduler,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>))
+
+define void @empty() {
+  ret void
+}


        


More information about the llvm-commits mailing list