[llvm-branch-commits] [llvm] AMDGPU: Declare pass control flags in header (PR #102865)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 12 04:17:15 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102865
>From 64679cbc78a5bba63bf0b5eb5427ffae7aae6b22 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 12 Aug 2024 12:46:00 +0400
Subject: [PATCH] AMDGPU: Declare pass control flags in header
This will allow them to be shared between the old PM and new PM files.
I don't really like needing to expose these globally like this; maybe
it would be better to just move TargetPassConfig and the CodeGenPassBuilder
into one common file?
---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 203 ++++++++----------
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 41 ++++
2 files changed, 133 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index cad4585c5b3013..3409a49fe203f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -74,6 +74,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+using namespace llvm::AMDGPU;
namespace {
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
@@ -186,109 +187,95 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
} // anonymous namespace
-static cl::opt<bool>
-EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
- cl::desc("Run early if-conversion"),
- cl::init(false));
+namespace llvm::AMDGPU {
+cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(false));
-static cl::opt<bool>
-OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
- cl::desc("Run pre-RA exec mask optimizations"),
- cl::init(true));
+cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
+ cl::desc("Run pre-RA exec mask optimizations"),
+ cl::init(true));
-static cl::opt<bool>
+cl::opt<bool>
LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
cl::desc("Lower GPU ctor / dtors to globals on the device."),
cl::init(true), cl::Hidden);
// Option to disable vectorizer for tests.
-static cl::opt<bool> EnableLoadStoreVectorizer(
- "amdgpu-load-store-vectorizer",
- cl::desc("Enable load store vectorizer"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool>
+ EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer",
+ cl::desc("Enable load store vectorizer"),
+ cl::init(true), cl::Hidden);
// Option to control global loads scalarization
-static cl::opt<bool> ScalarizeGlobal(
- "amdgpu-scalarize-global-loads",
- cl::desc("Enable global load scalarization"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads",
+ cl::desc("Enable global load scalarization"),
+ cl::init(true), cl::Hidden);
// Option to run internalize pass.
-static cl::opt<bool> InternalizeSymbols(
- "amdgpu-internalize-symbols",
- cl::desc("Enable elimination of non-kernel functions and unused globals"),
- cl::init(false),
- cl::Hidden);
+cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false), cl::Hidden);
// Option to inline all early.
-static cl::opt<bool> EarlyInlineAll(
- "amdgpu-early-inline-all",
- cl::desc("Inline all functions early"),
- cl::init(false),
- cl::Hidden);
+cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all",
+ cl::desc("Inline all functions early"),
+ cl::init(false), cl::Hidden);
-static cl::opt<bool> RemoveIncompatibleFunctions(
+cl::opt<bool> RemoveIncompatibleFunctions(
"amdgpu-enable-remove-incompatible-functions", cl::Hidden,
cl::desc("Enable removal of functions when they"
"use features not supported by the target GPU"),
cl::init(true));
-static cl::opt<bool> EnableSDWAPeephole(
- "amdgpu-sdwa-peephole",
- cl::desc("Enable SDWA peepholer"),
- cl::init(true));
+cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole",
+ cl::desc("Enable SDWA peepholer"),
+ cl::init(true));
-static cl::opt<bool> EnableDPPCombine(
- "amdgpu-dpp-combine",
- cl::desc("Enable DPP combiner"),
- cl::init(true));
+cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"), cl::init(true));
// Enable address space based alias analysis
-static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
- cl::desc("Enable AMDGPU Alias Analysis"),
- cl::init(true));
+cl::opt<bool>
+ EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
// Option to run late CFG structurizer
-static cl::opt<bool, true> LateCFGStructurize(
- "amdgpu-late-structurize",
- cl::desc("Enable late CFG structurization"),
- cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
- cl::Hidden);
+cl::opt<bool, true> LateCFGStructurize(
+ "amdgpu-late-structurize", cl::desc("Enable late CFG structurization"),
+ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden);
// Disable structurizer-based control-flow lowering in order to test convergence
// control tokens. This should eventually be replaced by the wave-transform.
-static cl::opt<bool, true> DisableStructurizer(
+cl::opt<bool, true> DisableStructurizer(
"amdgpu-disable-structurizer",
cl::desc("Disable structurizer for experiments; produces unusable code"),
cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
// Enable lib calls simplifications
-static cl::opt<bool> EnableLibCallSimplify(
- "amdgpu-simplify-libcall",
- cl::desc("Enable amdgpu library simplifications"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> EnableLowerKernelArguments(
- "amdgpu-ir-lower-kernel-arguments",
- cl::desc("Lower kernel argument loads in IR pass"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> EnableRegReassign(
- "amdgpu-reassign-regs",
- cl::desc("Enable register reassign optimizations on gfx10+"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool> OptVGPRLiveRange(
+cl::opt<bool>
+ EnableLibCallSimplify("amdgpu-simplify-libcall",
+ cl::desc("Enable amdgpu library simplifications"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool> EnableLowerKernelArguments(
+ "amdgpu-ir-lower-kernel-arguments",
+ cl::desc("Lower kernel argument loads in IR pass"), cl::init(true),
+ cl::Hidden);
+
+cl::opt<bool> EnableRegReassign(
+ "amdgpu-reassign-regs",
+ cl::desc("Enable register reassign optimizations on gfx10+"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool> OptVGPRLiveRange(
"amdgpu-opt-vgpr-liverange",
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
cl::init(true), cl::Hidden);
-static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
+cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
"amdgpu-atomic-optimizer-strategy",
cl::desc("Select DPP or Iterative strategy for scan"),
cl::init(ScanOptions::Iterative),
@@ -299,91 +286,85 @@ static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
// Enable Mode register optimization
-static cl::opt<bool> EnableSIModeRegisterPass(
- "amdgpu-mode-register",
- cl::desc("Enable mode register pass"),
- cl::init(true),
- cl::Hidden);
+cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register",
+ cl::desc("Enable mode register pass"),
+ cl::init(true), cl::Hidden);
// Enable GFX11.5+ s_singleuse_vdst insertion
-static cl::opt<bool>
+cl::opt<bool>
EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
cl::desc("Enable s_singleuse_vdst insertion"),
cl::init(false), cl::Hidden);
// Enable GFX11+ s_delay_alu insertion
-static cl::opt<bool>
- EnableInsertDelayAlu("amdgpu-enable-delay-alu",
- cl::desc("Enable s_delay_alu insertion"),
- cl::init(true), cl::Hidden);
+cl::opt<bool> EnableInsertDelayAlu("amdgpu-enable-delay-alu",
+ cl::desc("Enable s_delay_alu insertion"),
+ cl::init(true), cl::Hidden);
// Enable GFX11+ VOPD
-static cl::opt<bool>
- EnableVOPD("amdgpu-enable-vopd",
- cl::desc("Enable VOPD, dual issue of VALU in wave32"),
- cl::init(true), cl::Hidden);
+cl::opt<bool> EnableVOPD("amdgpu-enable-vopd",
+ cl::desc("Enable VOPD, dual issue of VALU in wave32"),
+ cl::init(true), cl::Hidden);
// Option is used in lit tests to prevent deadcoding of patterns inspected.
-static cl::opt<bool>
-EnableDCEInRA("amdgpu-dce-in-ra",
- cl::init(true), cl::Hidden,
- cl::desc("Enable machine DCE inside regalloc"));
-
-static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
- cl::desc("Adjust wave priority"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool> EnableScalarIRPasses(
- "amdgpu-scalar-ir-passes",
- cl::desc("Enable scalar IR passes"),
- cl::init(true),
- cl::Hidden);
-
-static cl::opt<bool, true> EnableStructurizerWorkarounds(
+cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden,
+ cl::desc("Enable machine DCE inside regalloc"));
+
+cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
+ cl::desc("Adjust wave priority"),
+ cl::init(false), cl::Hidden);
+
+cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes",
+ cl::desc("Enable scalar IR passes"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool, true> EnableStructurizerWorkarounds(
"amdgpu-enable-structurizer-workarounds",
cl::desc("Enable workarounds for the StructurizeCFG pass"),
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
cl::init(true), cl::Hidden);
-static cl::opt<bool, true> EnableLowerModuleLDS(
+cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnablePreRAOptimizations(
- "amdgpu-enable-pre-ra-optimizations",
- cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
- cl::Hidden);
+cl::opt<bool>
+ EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations",
+ cl::desc("Enable Pre-RA optimizations pass"),
+ cl::init(true), cl::Hidden);
-static cl::opt<bool> EnablePromoteKernelArguments(
+cl::opt<bool> EnablePromoteKernelArguments(
"amdgpu-enable-promote-kernel-arguments",
cl::desc("Enable promotion of flat kernel pointer arguments to global"),
cl::Hidden, cl::init(true));
-static cl::opt<bool> EnableImageIntrinsicOptimizer(
+cl::opt<bool> EnableImageIntrinsicOptimizer(
"amdgpu-enable-image-intrinsic-optimizer",
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool>
+cl::opt<bool>
EnableLoopPrefetch("amdgpu-loop-prefetch",
cl::desc("Enable loop data prefetch on AMDGPU"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> EnableMaxIlpSchedStrategy(
+cl::opt<bool> EnableMaxIlpSchedStrategy(
"amdgpu-enable-max-ilp-scheduling-strategy",
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
cl::Hidden, cl::init(false));
-static cl::opt<bool> EnableRewritePartialRegUses(
+cl::opt<bool> EnableRewritePartialRegUses(
"amdgpu-enable-rewrite-partial-reg-uses",
cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnableHipStdPar(
- "amdgpu-enable-hipstdpar",
- cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
- cl::Hidden);
+cl::opt<bool>
+ EnableHipStdPar("amdgpu-enable-hipstdpar",
+ cl::desc("Enable HIP Standard Parallelism Offload support"),
+ cl::init(false), cl::Hidden);
+
+} // namespace llvm::AMDGPU
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 4d39ad2b415052..f01e26a846f433 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -16,12 +16,53 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include <optional>
#include <utility>
namespace llvm {
+enum class ScanOptions;
+
+namespace AMDGPU {
+
+extern cl::opt<bool> EnableEarlyIfConversion;
+extern cl::opt<bool> OptExecMaskPreRA;
+extern cl::opt<bool> LowerCtorDtor;
+extern cl::opt<bool> EnableLoadStoreVectorizer;
+extern cl::opt<bool> ScalarizeGlobal;
+extern cl::opt<bool> InternalizeSymbols;
+extern cl::opt<bool> EarlyInlineAll;
+extern cl::opt<bool> RemoveIncompatibleFunctions;
+extern cl::opt<bool> EnableSDWAPeephole;
+extern cl::opt<bool> EnableDPPCombine;
+extern cl::opt<bool> EnableAMDGPUAliasAnalysis;
+extern cl::opt<bool, true> LateCFGStructurize;
+extern cl::opt<bool, true> DisableStructurizer;
+extern cl::opt<bool> EnableLibCallSimplify;
+extern cl::opt<bool> EnableLowerKernelArguments;
+extern cl::opt<bool> EnableRegReassign;
+extern cl::opt<bool> OptVGPRLiveRange;
+extern cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy;
+extern cl::opt<bool> EnableSIModeRegisterPass;
+extern cl::opt<bool> EnableInsertSingleUseVDST;
+extern cl::opt<bool> EnableInsertDelayAlu;
+extern cl::opt<bool> EnableVOPD;
+extern cl::opt<bool> EnableDCEInRA;
+extern cl::opt<bool> EnableSetWavePriority;
+extern cl::opt<bool> EnableScalarIRPasses;
+extern cl::opt<bool, true> EnableStructurizerWorkarounds;
+extern cl::opt<bool, true> EnableLowerModuleLDS;
+extern cl::opt<bool> EnablePreRAOptimizations;
+extern cl::opt<bool> EnablePromoteKernelArguments;
+extern cl::opt<bool> EnableImageIntrinsicOptimizer;
+extern cl::opt<bool> EnableLoopPrefetch;
+extern cl::opt<bool> EnableMaxIlpSchedStrategy;
+extern cl::opt<bool> EnableRewritePartialRegUses;
+extern cl::opt<bool> EnableHipStdPar;
+} // namespace AMDGPU
+
//===----------------------------------------------------------------------===//
// AMDGPU Target Machine (R600+)
//===----------------------------------------------------------------------===//
More information about the llvm-branch-commits
mailing list