[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Aug 11 07:32:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-transforms
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This specific callback should now be at parity with the old
pass manager version. There are still some missing IR passes
before this point.
Also I don't understand the need for the RequiresAnalysisPass at the
end. SelectionDAG should just be using the uncached getResult?
---
Full diff: https://github.com/llvm/llvm-project/pull/102814.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+53-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1)
- (modified) llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index fb3d3259171ac..36f44a20d9553 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -9,9 +9,17 @@
#include "AMDGPUCodeGenPassBuilder.h"
#include "AMDGPU.h"
#include "AMDGPUISelDAGToDAG.h"
+#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUTargetMachine.h"
+#include "AMDGPUUnifyDivergentExitNodes.h"
#include "SIFixSGPRCopies.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/Transforms/Scalar/FlattenCFG.h"
+#include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Utils/FixIrreducible.h"
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
using namespace llvm;
@@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
}
void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
- // TODO: Add passes pre instruction selection.
- // Test only, convert to real IR passes in future.
+ const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG;
+ const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
+ const bool EnableStructurizerWorkarounds =
+ AMDGPUTargetMachine::EnableStructurizerWorkarounds;
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(FlattenCFGPass());
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(SinkingPass());
+
+ addPass(AMDGPULateCodeGenPreparePass(TM));
+
+ // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
+ // regions formed by them.
+
+ addPass(AMDGPUUnifyDivergentExitNodesPass());
+
+ if (!LateCFGStructurize && !DisableStructurizer) {
+ if (EnableStructurizerWorkarounds) {
+ addPass(FixIrreduciblePass());
+ addPass(UnifyLoopExitsPass());
+ }
+
+ addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
+ }
+
+ addPass(AMDGPUAnnotateUniformValuesPass());
+
+ if (!LateCFGStructurize && !DisableStructurizer) {
+ addPass(SIAnnotateControlFlowPass(TM));
+
+ // TODO: Move this right after structurizeCFG to avoid extra divergence
+ // analysis. This depends on stopping SIAnnotateControlFlow from making
+ // control flow modifications.
+ addPass(AMDGPURewriteUndefForPHIPass());
+ }
+
+ addPass(LCSSAPass());
+
+ if (TM.getOptLevel() > CodeGenOptLevel::Less)
+ addPass(AMDGPUPerfHintAnalysisPass(TM));
+
+ // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
+ // isn't this in addInstSelector?
addPass(RequireAnalysisPass<UniformityInfoAnalysis, Function>());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0523fee5bcf9f..5929dadf93bcb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -338,10 +338,11 @@ static cl::opt<bool> EnableScalarIRPasses(
cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnableStructurizerWorkarounds(
+static cl::opt<bool, true> EnableStructurizerWorkarounds(
"amdgpu-enable-structurizer-workarounds",
- cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
- cl::Hidden);
+ cl::desc("Enable workarounds for the StructurizeCFG pass"),
+ cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
+ cl::init(true), cl::Hidden);
static cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
@@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
bool AMDGPUTargetMachine::DisableStructurizer = false;
+bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 6bb8788cc73b0..4d39ad2b41505 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -38,6 +38,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
static bool EnableFunctionCalls;
static bool EnableLowerModuleLDS;
static bool DisableStructurizer;
+ static bool EnableStructurizerWorkarounds;
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
diff --git a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
index 2c7072b8c93b1..2acd2355965a5 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
-; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -enable-new-pm | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -enable-new-pm -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
; This caused failure in infinite cycle in Selection DAG (combine) due to missing insert_subvector.
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/102814
More information about the llvm-branch-commits
mailing list