[llvm] [AMDGPU] Graph-based Module Splitting Rewrite (PR #104763)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 11:12:30 PDT 2024
================
@@ -44,187 +47,147 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/SHA256.h"
-#include "llvm/Support/Threading.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
#include <cassert>
+#include <cmath>
#include <iterator>
#include <memory>
#include <utility>
#include <vector>
-using namespace llvm;
+#ifndef NDEBUG
+#include "llvm/Support/LockFileManager.h"
+#endif
#define DEBUG_TYPE "amdgpu-split-module"
+namespace llvm {
namespace {
+static cl::opt<unsigned> MaxDepth(
+ "amdgpu-module-splitting-max-depth",
+ cl::desc(
+ "maximum search depth. 0 forces a greedy approach. "
+ "warning: the algorithm is up to O(2^N), where N is the max depth."),
+ cl::init(8));
+
static cl::opt<float> LargeFnFactor(
- "amdgpu-module-splitting-large-function-threshold", cl::init(2.0f),
- cl::Hidden,
+ "amdgpu-module-splitting-large-threshold", cl::init(2.0f), cl::Hidden,
cl::desc(
- "consider a function as large and needing special treatment when the "
- "cost of importing it into a partition"
- "exceeds the average cost of a partition by this factor; e;g. 2.0 "
- "means if the function and its dependencies is 2 times bigger than "
- "an average partition; 0 disables large functions handling entirely"));
+ "when max depth is reached and we can no longer branch out, this "
+ "value determines if a function is worth merging into an already "
+ "existing partition to reduce code duplication. This is a factor "
+ "of the ideal partition size, e.g. 2.0 means we consider the "
+ "function for merging if its cost (including its callees) is 2x the "
+ "size of an ideal partition."));
static cl::opt<float> LargeFnOverlapForMerge(
- "amdgpu-module-splitting-large-function-merge-overlap", cl::init(0.8f),
- cl::Hidden,
- cl::desc(
- "defines how much overlap between two large function's dependencies "
- "is needed to put them in the same partition"));
+ "amdgpu-module-splitting-merge-threshold", cl::init(0.7f), cl::Hidden,
+ cl::desc("when a function is considered for merging into a partition that "
+ "already contains some of its callees, do the merge if at least "
+ "n% of the code it can reach is already present inside the "
+ "partition; e.g. 0.7 means only merge >70%"));
static cl::opt<bool> NoExternalizeGlobals(
"amdgpu-module-splitting-no-externalize-globals", cl::Hidden,
cl::desc("disables externalization of global variable with local linkage; "
"may cause globals to be duplicated which increases binary size"));
static cl::opt<std::string>
- LogDirOpt("amdgpu-module-splitting-log-dir", cl::Hidden,
- cl::desc("output directory for AMDGPU module splitting logs"));
+ ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
+ cl::Hidden,
+ cl::desc("output file to write out the dotgraph "
+ "representation of the input module"));
+
+static cl::opt<std::string> PartitionSummariesOutput(
+ "amdgpu-module-splitting-print-partition-summaries", cl::Hidden,
+ cl::desc("output file to write out a summary of "
+ "the partitions created for each module"));
+
+#ifndef NDEBUG
+static cl::opt<bool> TimeBuild("amdgpu-module-splitting-time-trace", cl::Hidden,
+ cl::desc("enable and print timers"));
+
+static cl::opt<bool>
+ UseLockFile("amdgpu-module-splitting-serial-execution", cl::Hidden,
+ cl::desc("use a lock file so only one process in the system "
+ "can run this pass at once. useful to avoid mangled "
+ "debug output in multithreaded environments."));
static cl::opt<bool>
- LogPrivate("amdgpu-module-splitting-log-private", cl::Hidden,
- cl::desc("hash value names before printing them in the AMDGPU "
- "module splitting logs"));
+ DebugProposalSearch("amdgpu-module-splitting-debug-proposal-search",
+ cl::Hidden,
+ cl::desc("print all proposals received and whether "
+ "they were rejected or accepted"));
+
+struct SplitModuleTimer : NamedRegionTimer {
+ SplitModuleTimer(StringRef Name, StringRef Desc)
+ : NamedRegionTimer(Name, Desc, DEBUG_TYPE, "AMDGPU Module Splitting",
+ TimeBuild) {}
+};
+#else
+struct SplitModuleTimer {
+ SplitModuleTimer(StringRef Name, StringRef Desc) {}
+};
+#endif
+
+//===----------------------------------------------------------------------===//
+// Utils
+//===----------------------------------------------------------------------===//
using CostType = InstructionCost::CostType;
-using PartitionID = unsigned;
+using FunctionsCostMap = DenseMap<const Function *, CostType>;
using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
+static constexpr unsigned InvalidPID = -1;
-static bool isEntryPoint(const Function *F) {
- return AMDGPU::isEntryFunctionCC(F->getCallingConv());
+/// \param Num numerator
+/// \param Dem denominator
+/// \returns a printable object to print (Num/Dem) using "%0.2f".
+static auto formatRatioOf(CostType Num, CostType Dem) {
+ return format("%0.2f", (static_cast<double>(Num) / Dem) * 100);
}
-static std::string getName(const Value &V) {
- static bool HideNames;
-
- static llvm::once_flag HideNameInitFlag;
- llvm::call_once(HideNameInitFlag, [&]() {
- if (LogPrivate.getNumOccurrences())
- HideNames = LogPrivate;
- else {
- const auto EV = sys::Process::GetEnv("AMD_SPLIT_MODULE_LOG_PRIVATE");
- HideNames = (EV.value_or("0") != "0");
- }
- });
-
- if (!HideNames)
- return V.getName().str();
- return toHex(SHA256::hash(arrayRefFromStringRef(V.getName())),
- /*LowerCase=*/true);
+static bool isNonCopyable(const Function &F) {
----------------
arsenm wrote:
Document what non-copyable is?
https://github.com/llvm/llvm-project/pull/104763
More information about the llvm-commits
mailing list