[llvm] [AMDGPU] Move AMDGPUTargetMachine into AMDGPUCodeGenPassBuilder(NFC) (PR #103720)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 02:07:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Christudasan Devadasan (cdevadas)
<details>
<summary>Changes</summary>
This will allow us to reuse the existing flags and the static
functions while building the pipeline for new pass manager.
---
Patch is 162.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/103720.diff
34 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+1734-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+142-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+1-1)
- (removed) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (-1751)
- (removed) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (-162)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (-1)
- (modified) llvm/lib/Target/AMDGPU/R600TargetMachine.cpp (-1)
- (modified) llvm/lib/Target/AMDGPU/R600TargetMachine.h (+1-1)
- (modified) llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (-1)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+1-1)
- (modified) llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp (+1-1)
- (modified) llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp (+1-1)
- (modified) llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp (+1-1)
- (modified) llvm/unittests/Target/AMDGPU/PALMetadata.cpp (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index f55f656ff922c1..adaecd4cee8383 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUCodeGenPassBuilder.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Module.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index f57fc168c1dfce..59d8c84430f9d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -8,7 +8,7 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPU.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUCodeGenPassBuilder.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 25e36dc4b3691f..e124cfb3fba72d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -14,8 +14,8 @@
#include "AMDGPUCallLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUCodeGenPassBuilder.h"
#include "AMDGPULegalizerInfo.h"
-#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/Analysis.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index fb3d3259171aca..0d7233432fc2b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -5,15 +5,1748 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains both AMDGPU target machine and the CodeGen pass builder.
+/// The AMDGPU target machine contains all of the hardware specific information
+/// needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The
+/// CodeGen pass builder design is its equivalent for the new pass manager.
+//
+//===----------------------------------------------------------------------===//
#include "AMDGPUCodeGenPassBuilder.h"
#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPUCtorDtorLowering.h"
+#include "AMDGPUExportClustering.h"
+#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUMacroFusion.h"
+#include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPURegBankSelect.h"
+#include "AMDGPUSplitModule.h"
+#include "AMDGPUTargetObjectFile.h"
+#include "AMDGPUTargetTransformInfo.h"
+#include "AMDGPUUnifyDivergentExitNodes.h"
+#include "GCNIterativeScheduler.h"
+#include "GCNSchedStrategy.h"
+#include "GCNVOPDUtils.h"
+#include "R600.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIMachineScheduler.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ExpandVariadics.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
+#include <optional>
using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace {
+class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
+public:
+ SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
+class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
+public:
+ VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
+static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
+}
+
+static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
+}
+
+/// -{sgpr|vgpr}-regalloc=... command line option.
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
+static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
+
+static SGPRRegisterRegAlloc
+ defaultSGPRRegAlloc("default",
+ "pick SGPR register allocator based on -O option",
+ useDefaultRegisterAllocator);
+
+static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<SGPRRegisterRegAlloc>>
+ SGPRRegAlloc("sgpr-regalloc", cl::Hidden,
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use for SGPRs"));
+
+static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<VGPRRegisterRegAlloc>>
+ VGPRRegAlloc("vgpr-regalloc", cl::Hidden,
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use for VGPRs"));
+
+static void initializeDefaultSGPRRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = SGPRRegAlloc;
+ SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
+ }
+}
+
+static void initializeDefaultVGPRRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = VGPRRegAlloc;
+ VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
+ }
+}
+
+static FunctionPass *createBasicSGPRRegisterAllocator() {
+ return createBasicRegisterAllocator(onlyAllocateSGPRs);
+}
+
+static FunctionPass *createGreedySGPRRegisterAllocator() {
+ return createGreedyRegisterAllocator(onlyAllocateSGPRs);
+}
+
+static FunctionPass *createFastSGPRRegisterAllocator() {
+ return createFastRegisterAllocator(onlyAllocateSGPRs, false);
+}
+
+static FunctionPass *createBasicVGPRRegisterAllocator() {
+ return createBasicRegisterAllocator(onlyAllocateVGPRs);
+}
+
+static FunctionPass *createGreedyVGPRRegisterAllocator() {
+ return createGreedyRegisterAllocator(onlyAllocateVGPRs);
+}
+
+static FunctionPass *createFastVGPRRegisterAllocator() {
+ return createFastRegisterAllocator(onlyAllocateVGPRs, true);
+}
+
+static SGPRRegisterRegAlloc basicRegAllocSGPR("basic",
+ "basic register allocator",
+ createBasicSGPRRegisterAllocator);
+static SGPRRegisterRegAlloc
+ greedyRegAllocSGPR("greedy", "greedy register allocator",
+ createGreedySGPRRegisterAllocator);
+
+static SGPRRegisterRegAlloc fastRegAllocSGPR("fast", "fast register allocator",
+ createFastSGPRRegisterAllocator);
+
+static VGPRRegisterRegAlloc basicRegAllocVGPR("basic",
+ "basic register allocator",
+ createBasicVGPRRegisterAllocator);
+static VGPRRegisterRegAlloc
+ greedyRegAllocVGPR("greedy", "greedy register allocator",
+ createGreedyVGPRRegisterAllocator);
+
+static VGPRRegisterRegAlloc fastRegAllocVGPR("fast", "fast register allocator",
+ createFastVGPRRegisterAllocator);
+} // anonymous namespace
+
+static cl::opt<bool>
+ EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(false));
+
+static cl::opt<bool>
+ OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
+ cl::desc("Run pre-RA exec mask optimizations"),
+ cl::init(true));
+
+static cl::opt<bool>
+ LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
+ cl::desc("Lower GPU ctor / dtors to globals on the device."),
+ cl::init(true), cl::Hidden);
+
+// Option to disable vectorizer for tests.
+static cl::opt<bool>
+ EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer",
+ cl::desc("Enable load store vectorizer"),
+ cl::init(true), cl::Hidden);
+
+// Option to control global loads scalarization
+static cl::opt<bool>
+ ScalarizeGlobal("amdgpu-scalarize-global-loads",
+ cl::desc("Enable global load scalarization"),
+ cl::init(true), cl::Hidden);
+
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false), cl::Hidden);
+
+// Option to inline all early.
+static cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all",
+ cl::desc("Inline all functions early"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> RemoveIncompatibleFunctions(
+ "amdgpu-enable-remove-incompatible-functions", cl::Hidden,
+ cl::desc("Enable removal of functions when they"
+ "use features not supported by the target GPU"),
+ cl::init(true));
+
+static cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole",
+ cl::desc("Enable SDWA peepholer"),
+ cl::init(true));
+
+static cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"),
+ cl::init(true));
+
+// Enable address space based alias analysis
+static cl::opt<bool>
+ EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
+
+// Option to run late CFG structurizer
+static cl::opt<bool, true> LateCFGStructurize(
+ "amdgpu-late-structurize", cl::desc("Enable late CFG structurization"),
+ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden);
+
+// Disable structurizer-based control-flow lowering in order to test convergence
+// control tokens. This should eventually be replaced by the wave-transform.
+static cl::opt<bool, true> DisableStructurizer(
+ "amdgpu-disable-structurizer",
+ cl::desc("Disable structurizer for experiments; produces unusable code"),
+ cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
+
+// Enable lib calls simplifications
+static cl::opt<bool>
+ EnableLibCallSimplify("amdgpu-simplify-libcall",
+ cl::desc("Enable amdgpu library simplifications"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnableLowerKernelArguments(
+ "amdgpu-ir-lower-kernel-arguments",
+ cl::desc("Lower kernel argument loads in IR pass"), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool> EnableRegReassign(
+ "amdgpu-reassign-regs",
+ cl::desc("Enable register reassign optimizations on gfx10+"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> OptVGPRLiveRange(
+ "amdgpu-opt-vgpr-liverange",
+ cl::desc("Enable VGPR liverange optimizations for if-else structure"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
+ "amdgpu-atomic-optimizer-strategy",
+ cl::desc("Select DPP or Iterative strategy for scan"),
+ cl::init(ScanOptions::Iterative),
+ cl::values(
+ clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),
+ clEnumValN(ScanOptions::Iterative, "Iterative",
+ "Use Iterative approach for scan"),
+ clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
+
+// Enable Mode register optimization
+static cl::opt<bool>
+ EnableSIModeRegisterPass("amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true), cl::Hidden);
+
+// Enable GFX11.5+ s_singleuse_vdst insertion
+static cl::opt<bool>
+ EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
+ cl::desc("Enable s_singleuse_vdst insertion"),
+ cl::init(false), cl::Hidden);
+
+// Enable GFX11+ s_delay_alu insertion
+static cl::opt<bool>
+ EnableInsertDelayAlu("amdgpu-enable-delay-alu",
+ cl::desc("Enable s_delay_alu insertion"),
+ cl::init(true), cl::Hidden);
+
+// Enable GFX11+ VOPD
+static cl::opt<bool>
+ EnableVOPD("amdgpu-enable-vopd",
+ cl::desc("Enable VOPD, dual issue of VALU in wave32"),
+ cl::init(true), cl::Hidden);
+
+// Option is used in lit tests to prevent deadcoding of patterns inspected.
+static cl::opt<bool>
+ EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden,
+ cl::desc("Enable machine DCE inside regalloc"));
+
+static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
+ cl::desc("Adjust wave priority"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes",
+ cl::desc("Enable scalar IR passes"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnableStructurizerWorkarounds(
+ "amdgpu-enable-structurizer-workarounds",
+ cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool, true> EnableLowerModuleLDS(
+ "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
+ cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool>
+ EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations",
+ cl::desc("Enable Pre-RA optimizations pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnablePromoteKernelArguments(
+ "amdgpu-enable-promote-kernel-arguments",
+ cl::desc("Enable promotion of flat kernel pointer arguments to global"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> EnableImageIntrinsicOptimizer(
+ "amdgpu-enable-image-intrinsic-optimizer",
+ cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool>
+ EnableLoopPrefetch("amdgpu-loop-prefetch",
+ cl::desc("Enable loop data prefetch on AMDGPU"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> EnableMaxIlpSchedStrategy(
+ "amdgpu-enable-max-ilp-scheduling-strategy",
+ cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> EnableRewritePartialRegUses(
+ "amdgpu-enable-rewrite-partial-reg-uses",
+ cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool>
+ EnableHipStdPar("amdgpu-enable-hipstdpar",
+ cl::desc("Enable HIP Standard Parallelism Offload support"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ EnableAMDGPUAttributor("amdgpu-attributor-enable",
+ cl::desc("Enable AMDGPUAttributorPass"),
+ cl::init(true), cl::Hidden);
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
+ // Register the target
+ RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
+ RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
+
+ PassRegistry *PR = PassRegistry::getPassRegistry();
+ initializeR600ClauseMergePassPass(*PR);
+ initializeR600ControlFlowFinalizerPass(*PR);
+ initializeR600PacketizerPass(*PR);
+ initializeR600ExpandSpecialInstrsPassPass(*PR);
+ initializeR600VectorRegMergerPass(*PR);
+ initializeGlobalISel(*PR);
+ initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
+ initializeGCNDPPCombinePass(*PR);
+ initializeSILowerI1CopiesLegacyPass(*PR);
+ initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
+ initializeSILowerWWMCopiesPass(*PR);
+ initializeAMDGPUMarkLastScratchLoadPass(*PR);
+ initializeSILowerSGPRSpillsPass(*PR);
+ initializeSIFixSGPRCopiesLegacyPass(*PR);
+ initializeSIFixVGPRCopiesPass(*PR);
+ initializeSIFoldOperandsPass(*PR);
+ initializeSIPeepholeSDWAPass(*PR);
+ initializeSIShrinkInstructionsPass(*PR);
+ initializeSIOptimizeExecMaskingPreRAPass(*PR);
+ initializeSIOptimizeVGPRLiveRangePass(*PR);
+ initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
+ initializeAMDGPUAlwaysInlinePass(*PR);
+ initializeAMDGPUAttributorLegacyPass(*PR);
+ initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
+ initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR);
+ initializeAMDGPUArgumentUsageInfoPass(*PR);
+ initializeAMDGPUAtomicOptimizerPass(*PR);
+ initializeAMDGPULowerKernelArgumentsPass(*PR);
+ initializeAMDGPUPromoteKernelArgumentsPass(*PR);
+ initializeAMDGPULowerKernelAttributesPass(*PR);
+ initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
+ initializeAMDGPUPostLegalizerCombinerPass(*PR);
+ initializeAMDGPUPreLegalizerCombinerPass(*PR);
+ initializeAMDGPURegBankCombinerPass(*PR);
+ initializeAMDGPURegBankSelectPass(*PR);
+ initializeAMDGPUPromoteAllocaPass(*PR);
+ initializeAMDGPUPromoteAllocaToVectorPass(*PR);
+ initializeAMDGPUCodeGenPreparePass(*PR);
+ initializeAMDGPULateCodeGenPrepareLegacyPass(*PR);
+ initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
+ initializeAMDGPULowerModuleLDSLegacyPass(*PR);
+ initializeAMDGPULowerBufferFatPointersPass(*PR);
+ initializeAMDGPURewriteOutArgumentsPass(*PR);
+ initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
+ initializeAMDGPUUnifyMetadataPass(*PR);
+ initializeSIAnnotateControlFlowLegacyPass(*PR);
+ initializeAMDGPUInsertSingleUseVDSTPass(*PR);
+ initializeAMDGPUInsertDelayAluPass(*PR);
+ initializeSIInsertHardClausesPass(*PR);
+ initializeSIInsertWaitcntsPass(*PR);
+ initializeSIModeRegisterPass(*PR);
+ initializeSIWholeQuadModePass(*PR);
+ initializeSILowerControlFlowPass(*PR);
+ initializeSIPreEmitPeepholePass(*PR);
+ initializeSILateBranchLoweringPass(*PR);
+ in...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/103720
More information about the llvm-commits
mailing list