[llvm] AMDGPU/NewPM: Fill out passes in addCodeGenPrepare (PR #102867)

Tue Aug 20 12:28:30 PDT 2024

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102867

>From f3a5545accd71b2f478132207f6dc720692533f9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 12 Aug 2024 13:09:55 +0400
Subject: [PATCH] AMDGPU/NewPM: Fill out passes in addCodeGenPrepare

AMDGPUAnnotateKernelFeatures hasn't been ported yet, but it
should be soon removable.
---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 36 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |  6 ++++
 2 files changed, 42 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04b96a32a8970a..b27746f9d6a557 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1770,8 +1770,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
 }
 
 void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
+  // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
+  // deleted soon.
+
+  if (EnableLowerKernelArguments)
+    addPass(AMDGPULowerKernelArgumentsPass(TM));
+
+  // This lowering has been placed after codegenprepare to take advantage of
+  // address mode matching (which is why it isn't put with the LDS lowerings).
+  // It could be placed anywhere before uniformity annotations (an analysis
+  // that it changes by splitting up fat pointers into their components)
+  // but has been put before switch lowering and CFG flattening so that those
+  // passes can run on the more optimized control flow this pass creates in
+  // many cases.
+  //
+  // FIXME: This should ideally be put after the LoadStoreVectorizer.
+  // However, due to some annoying facts about ResourceUsageAnalysis,
+  // (especially as exercised in the resource-usage-dead-function test),
+  // we need all the function passes codegenprepare all the way through
+  // said resource usage analysis to run on the call graph produced
+  // before codegenprepare runs (because codegenprepare will knock some
+  // nodes out of the graph, which leads to function-level passes not
+  // being run on them, which causes crashes in the resource usage analysis).
+  addPass(AMDGPULowerBufferFatPointersPass(TM));
+
   Base::addCodeGenPrepare(addPass);
 
+  if (isPassEnabled(EnableLoadStoreVectorizer))
+    addPass(LoadStoreVectorizerPass());
+
   // LowerSwitch pass may introduce unreachable blocks that can cause unexpected
   // behavior for subsequent passes. Placing it here seems better that these
   // blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -1839,3 +1866,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
   addPass(SILowerI1CopiesPass());
   return Error::success();
 }
+
+bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
+                                             CodeGenOptLevel Level) const {
+  if (Opt.getNumOccurrences())
+    return Opt;
+  if (TM.getOptLevel() < Level)
+    return false;
+  return Opt;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 576bf40e3328d5..9ee406154b9b76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,12 @@ class AMDGPUCodeGenPassBuilder
   void addPreISel(AddIRPass &addPass) const;
   void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
   Error addInstSelector(AddMachinePass &) const;
+
+  /// Check if a pass is enabled given \p Opt option. The option always
+  /// overrides defaults if explicitly used. Otherwise its default will be used
+  /// given that a pass shall work at an optimization \p Level minimum.
+  bool isPassEnabled(const cl::opt<bool> &Opt,
+                     CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
 };
 
 } // end namespace llvm