[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start filling out addIRPasses (PR #102884)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 12 04:40:33 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
This is not complete, but gets AtomicExpand running. I was able
to get further than I expected; we're quite close to having all
the IR codegen passes ported.
---
Full diff: https://github.com/llvm/llvm-project/pull/102884.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+104)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index 9fd7e24b114dd..854e1644a71e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -8,14 +8,24 @@
#include "AMDGPUCodeGenPassBuilder.h"
#include "AMDGPU.h"
+#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUTargetMachine.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "SIFixSGPRCopies.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
+#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
+#include "llvm/Transforms/Scalar/NaryReassociate.h"
+#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
@@ -38,6 +48,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
ShadowStackGCLoweringPass>();
}
+void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
+ // TODO: Missing AMDGPURemoveIncompatibleFunctions
+
+ addPass(AMDGPUPrintfRuntimeBindingPass());
+ if (LowerCtorDtor)
+ addPass(AMDGPUCtorDtorLoweringPass());
+
+ if (isPassEnabled(EnableImageIntrinsicOptimizer))
+ addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
+
+ // This can be disabled by passing ::Disable here or on the command line
+ // with --expand-variadics-override=disable.
+ addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
+
+ addPass(AMDGPUAlwaysInlinePass());
+ addPass(AlwaysInlinerPass());
+
+ // TODO: Missing OpenCLEnqueuedBlockLowering
+
+ // Runs before PromoteAlloca so the latter can account for function uses
+ if (EnableLowerModuleLDS)
+ addPass(AMDGPULowerModuleLDSPass(TM));
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(InferAddressSpacesPass());
+
+ // Run atomic optimizer before Atomic Expand
+ if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
+ (AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
+ addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy));
+
+ addPass(AtomicExpandPass());
+
+ if (TM.getOptLevel() > CodeGenOptLevel::None) {
+ addPass(AMDGPUPromoteAllocaPass(TM));
+ if (isPassEnabled(EnableScalarIRPasses))
+ addStraightLineScalarOptimizationPasses(addPass);
+
+ // TODO: Handle EnableAMDGPUAliasAnalysis
+
+ // TODO: May want to move later or split into an early and late one.
+ addPass(AMDGPUCodeGenPreparePass(TM));
+
+ // TODO: LICM
+ }
+
+ Base::addIRPasses(addPass);
+
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
+ //
+ // %0 = add %a, %b
+ // %1 = add %b, %a
+ //
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ if (isPassEnabled(EnableScalarIRPasses))
+ addEarlyCSEOrGVNPass(addPass);
+}
+
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
// deleted soon.
@@ -136,6 +210,36 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
return Error::success();
}
+void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const {
+ if (TM.getOptLevel() == CodeGenOptLevel::Aggressive)
+ addPass(GVNPass());
+ else
+ addPass(EarlyCSEPass());
+}
+
+void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
+ AddIRPass &addPass) const {
+ if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
+ addPass(LoopDataPrefetchPass());
+
+ addPass(SeparateConstOffsetFromGEPPass());
+
+ // ReassociateGEPs exposes more opportunities for SLSR. See
+ // the example in reassociate-geps-and-slsr.ll.
+ addPass(StraightLineStrengthReducePass());
+
+ // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
+ // EarlyCSE can reuse.
+ addEarlyCSEOrGVNPass(addPass);
+
+ // Run NaryReassociate after EarlyCSE/GVN to be more effective.
+ addPass(NaryReassociatePass());
+
+ // NaryReassociate on GEPs creates redundant common expressions, so run
+ // EarlyCSE after it.
+ addPass(EarlyCSEPass());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
index 1ff7744c84a43..c71566316993d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
@@ -24,11 +24,16 @@ class AMDGPUCodeGenPassBuilder
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
+
+ void addIRPasses(AddIRPass &) const;
void addCodeGenPrepare(AddIRPass &) const;
void addPreISel(AddIRPass &addPass) const;
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
+ void addEarlyCSEOrGVNPass(AddIRPass &) const;
+ void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
+
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will
/// be used given that a pass shall work at an optimization \p Level
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3409a49fe203f..b7c91ae93de84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -66,6 +66,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
+#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
``````````
</details>
https://github.com/llvm/llvm-project/pull/102884
More information about the llvm-branch-commits
mailing list