[llvm] [AMDGPU] Graph-based Module Splitting Rewrite (PR #104763)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 07:05:13 PDT 2024
================
@@ -44,187 +47,152 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/SHA256.h"
-#include "llvm/Support/Threading.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
#include <cassert>
+#include <cmath>
#include <iterator>
#include <memory>
#include <utility>
#include <vector>
-using namespace llvm;
+#ifndef NDEBUG
+#include "llvm/Support/LockFileManager.h"
+#endif
#define DEBUG_TYPE "amdgpu-split-module"
+namespace llvm {
namespace {
+static cl::opt<unsigned> MaxDepth(
+ "amdgpu-module-splitting-max-depth",
+ cl::desc(
+ "maximum search depth. 0 forces a greedy approach. "
+ "warning: the algorithm is up to O(2^N), where N is the max depth."),
+ cl::init(8));
+
static cl::opt<float> LargeFnFactor(
- "amdgpu-module-splitting-large-function-threshold", cl::init(2.0f),
- cl::Hidden,
+ "amdgpu-module-splitting-large-threshold", cl::init(2.0f), cl::Hidden,
cl::desc(
- "consider a function as large and needing special treatment when the "
- "cost of importing it into a partition"
- "exceeds the average cost of a partition by this factor; e;g. 2.0 "
- "means if the function and its dependencies is 2 times bigger than "
- "an average partition; 0 disables large functions handling entirely"));
+ "when max depth is reached and we can no longer branch out, this "
+ "value determines if a function is worth merging into an already "
+ "existing partition to reduce code duplication. This is a factor "
+ "of the ideal partition size, e.g. 2.0 means we consider the "
+ "function for merging if its cost (including its callees) is 2x the "
+ "size of an ideal partition."));
static cl::opt<float> LargeFnOverlapForMerge(
- "amdgpu-module-splitting-large-function-merge-overlap", cl::init(0.8f),
- cl::Hidden,
- cl::desc(
- "defines how much overlap between two large function's dependencies "
- "is needed to put them in the same partition"));
+ "amdgpu-module-splitting-merge-threshold", cl::init(0.7f), cl::Hidden,
+ cl::desc("when a function is considered for merging into a partition that "
+ "already contains some of its callees, do the merge if at least "
+ "n% of the code it can reach is already present inside the "
+ "partition; e.g. 0.7 means only merge >70%"));
static cl::opt<bool> NoExternalizeGlobals(
"amdgpu-module-splitting-no-externalize-globals", cl::Hidden,
cl::desc("disables externalization of global variable with local linkage; "
"may cause globals to be duplicated which increases binary size"));
static cl::opt<std::string>
- LogDirOpt("amdgpu-module-splitting-log-dir", cl::Hidden,
- cl::desc("output directory for AMDGPU module splitting logs"));
+ ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
+ cl::Hidden,
+ cl::desc("output file to write out the dotgraph "
+ "representation of the input module"));
+
+static cl::opt<std::string> PartitionSummariesOutput(
+ "amdgpu-module-splitting-print-partition-summaries", cl::Hidden,
+ cl::desc("output file to write out a summary of "
+ "the partitions created for each module"));
+
+#ifndef NDEBUG
+static cl::opt<bool> TimeBuild("amdgpu-module-splitting-time-trace", cl::Hidden,
+ cl::desc("enable and print timers"));
+
+static cl::opt<bool>
+ UseLockFile("amdgpu-module-splitting-serial-execution", cl::Hidden,
+ cl::desc("use a lock file so only one process in the system "
+ "can run this pass at once. useful to avoid mangled "
+ "debug output in multithreaded environments."));
static cl::opt<bool>
- LogPrivate("amdgpu-module-splitting-log-private", cl::Hidden,
- cl::desc("hash value names before printing them in the AMDGPU "
- "module splitting logs"));
+ DebugProposalSearch("amdgpu-module-splitting-debug-proposal-search",
+ cl::Hidden,
+ cl::desc("print all proposals received and whether "
+ "they were rejected or accepted"));
+
+struct SplitModuleTimer : NamedRegionTimer {
+ SplitModuleTimer(StringRef Name, StringRef Desc)
+ : NamedRegionTimer(Name, Desc, DEBUG_TYPE, "AMDGPU Module Splitting",
+ TimeBuild) {}
+};
+#else
+struct SplitModuleTimer {
+ SplitModuleTimer(StringRef Name, StringRef Desc) {}
+};
+#endif
+
+//===----------------------------------------------------------------------===//
+// Utils
+//===----------------------------------------------------------------------===//
using CostType = InstructionCost::CostType;
-using PartitionID = unsigned;
+using FunctionsCostMap = DenseMap<const Function *, CostType>;
using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
+static constexpr unsigned InvalidPID = -1;
+
+/// \param Num numerator
+/// \param Dem denominator
+/// \param FmtString printf-like format string
+/// \returns a printable object to print (Num/Dem) using FmtString.
+static auto formatRatioOf(CostType Num, CostType Dem,
+ const char *FmtString = "%0.2f") {
+ return format(FmtString, (double(Num) / Dem) * 100);
+}
-static bool isEntryPoint(const Function *F) {
+static bool isKernel(const Function *F) {
return AMDGPU::isEntryFunctionCC(F->getCallingConv());
}
-static std::string getName(const Value &V) {
- static bool HideNames;
-
- static llvm::once_flag HideNameInitFlag;
- llvm::call_once(HideNameInitFlag, [&]() {
- if (LogPrivate.getNumOccurrences())
- HideNames = LogPrivate;
- else {
- const auto EV = sys::Process::GetEnv("AMD_SPLIT_MODULE_LOG_PRIVATE");
- HideNames = (EV.value_or("0") != "0");
- }
- });
-
- if (!HideNames)
- return V.getName().str();
- return toHex(SHA256::hash(arrayRefFromStringRef(V.getName())),
- /*LowerCase=*/true);
+static bool isNonCopyable(const Function &F) {
+ return isKernel(&F) || F.hasExternalLinkage() || !F.isDefinitionExact();
}
-/// Main logging helper.
-///
-/// Logging can be configured by the following environment variable.
-/// AMD_SPLIT_MODULE_LOG_DIR=<filepath>
-/// If set, uses <filepath> as the directory to write logfiles to
-/// each time module splitting is used.
-/// AMD_SPLIT_MODULE_LOG_PRIVATE
-/// If set to anything other than zero, all names are hidden.
-///
-/// Both environment variables have corresponding CL options which
-/// takes priority over them.
-///
-/// Any output printed to the log files is also printed to dbgs() when -debug is
-/// used and LLVM_DEBUG is defined.
-///
-/// This approach has a small disadvantage over LLVM_DEBUG though: logging logic
-/// cannot be removed from the code (by building without debug). This probably
-/// has a small performance cost because if some computation/formatting is
-/// needed for logging purpose, it may be done everytime only to be ignored
-/// by the logger.
-///
-/// As this pass only runs once and is not doing anything computationally
-/// expensive, this is likely a reasonable trade-off.
-///
-/// If some computation should really be avoided when unused, users of the class
-/// can check whether any logging will occur by using the bool operator.
-///
-/// \code
-/// if (SML) {
-/// // Executes only if logging to a file or if -debug is available and
-/// used.
-/// }
-/// \endcode
-class SplitModuleLogger {
-public:
- SplitModuleLogger(const Module &M) {
- std::string LogDir = LogDirOpt;
- if (LogDir.empty())
- LogDir = sys::Process::GetEnv("AMD_SPLIT_MODULE_LOG_DIR").value_or("");
-
- // No log dir specified means we don't need to log to a file.
- // We may still log to dbgs(), though.
- if (LogDir.empty())
- return;
-
- // If a log directory is specified, create a new file with a unique name in
- // that directory.
- int Fd;
- SmallString<0> PathTemplate;
- SmallString<0> RealPath;
- sys::path::append(PathTemplate, LogDir, "Module-%%-%%-%%-%%-%%-%%-%%.txt");
- if (auto Err =
- sys::fs::createUniqueFile(PathTemplate.str(), Fd, RealPath)) {
- report_fatal_error("Failed to create log file at '" + Twine(LogDir) +
- "': " + Err.message(),
- /*CrashDiag=*/false);
- }
-
- FileOS = std::make_unique<raw_fd_ostream>(Fd, /*shouldClose=*/true);
- }
-
- bool hasLogFile() const { return FileOS != nullptr; }
-
- raw_ostream &logfile() {
- assert(FileOS && "no logfile!");
- return *FileOS;
- }
-
- /// \returns true if this SML will log anything either to a file or dbgs().
- /// Can be used to avoid expensive computations that are ignored when logging
- /// is disabled.
- operator bool() const {
- return hasLogFile() || (DebugFlag && isCurrentDebugType(DEBUG_TYPE));
+/// If \p GV has local linkage, make it external + hidden.
+static void externalize(GlobalValue &GV) {
+ if (GV.hasLocalLinkage()) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.setVisibility(GlobalValue::HiddenVisibility);
}
-private:
- std::unique_ptr<raw_fd_ostream> FileOS;
-};
-
-template <typename Ty>
-static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) {
- static_assert(
- !std::is_same_v<Ty, Value>,
- "do not print values to logs directly, use handleName instead!");
- LLVM_DEBUG(dbgs() << Val);
- if (SML.hasLogFile())
- SML.logfile() << Val;
- return SML;
+ // Unnamed entities must be named consistently between modules. setName will
+ // give a distinct name to each such entity.
+ if (!GV.hasName())
+ GV.setName("__llvmsplit_unnamed");
----------------
arsenm wrote:
Use period in the name, since they don't conflict with C identifiers?
https://github.com/llvm/llvm-project/pull/104763
More information about the llvm-commits
mailing list