[llvm] [tools] LLVM Advisor - optimization analysis and performance guidance tool (PR #147451)
Miguel Cárdenas via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 12 21:49:38 PDT 2025
https://github.com/miguelcsx updated https://github.com/llvm/llvm-project/pull/147451
>From 429ae143a09775ea9e5ab8854b9b695edd7cf63f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Tue, 8 Jul 2025 04:41:10 +0200
Subject: [PATCH 1/9] [llvm-advisor] add initial project structure and
configuration
The AdvisorConfig class provides JSON based configuration loading
with file classification patterns and output directory management.
---
llvm/tools/llvm-advisor/CMakeLists.txt | 15 +++++
llvm/tools/llvm-advisor/config/config.json | 7 ++
llvm/tools/llvm-advisor/src/CMakeLists.txt | 35 ++++++++++
.../llvm-advisor/src/Config/AdvisorConfig.cpp | 64 +++++++++++++++++++
.../llvm-advisor/src/Config/AdvisorConfig.h | 41 ++++++++++++
5 files changed, 162 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/CMakeLists.txt
create mode 100644 llvm/tools/llvm-advisor/config/config.json
create mode 100644 llvm/tools/llvm-advisor/src/CMakeLists.txt
create mode 100644 llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h
diff --git a/llvm/tools/llvm-advisor/CMakeLists.txt b/llvm/tools/llvm-advisor/CMakeLists.txt
new file mode 100644
index 0000000000000..d2389bdd1e0fa
--- /dev/null
+++ b/llvm/tools/llvm-advisor/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.18)
+
+set(LLVM_TOOL_LLVM_ADVISOR_BUILD_DEFAULT ON)
+set(LLVM_REQUIRE_EXE_NAMES llvm-advisor)
+
+add_subdirectory(src)
+
+# Set the executable name
+set_target_properties(llvm-advisor PROPERTIES
+ OUTPUT_NAME llvm-advisor)
+
+# Install the binary
+install(TARGETS llvm-advisor
+ RUNTIME DESTINATION bin
+ COMPONENT llvm-advisor)
diff --git a/llvm/tools/llvm-advisor/config/config.json b/llvm/tools/llvm-advisor/config/config.json
new file mode 100644
index 0000000000000..9e94a41ff46c4
--- /dev/null
+++ b/llvm/tools/llvm-advisor/config/config.json
@@ -0,0 +1,7 @@
+{
+ "outputDir": ".llvm-advisor",
+ "verbose": false,
+ "keepTemps": false,
+ "runProfiler": true,
+ "timeout": 60
+}
diff --git a/llvm/tools/llvm-advisor/src/CMakeLists.txt b/llvm/tools/llvm-advisor/src/CMakeLists.txt
new file mode 100644
index 0000000000000..81088f8231625
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/CMakeLists.txt
@@ -0,0 +1,35 @@
+# Gather all .cpp sources in this directory tree
+file(GLOB_RECURSE LLVM_ADVISOR_SOURCES CONFIGURE_DEPENDS
+ ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+
+# Define the executable target
+add_llvm_tool(llvm-advisor
+ ${LLVM_ADVISOR_SOURCES}
+)
+
+# Link required LLVM libraries
+target_link_libraries(llvm-advisor PRIVATE
+ LLVMSupport
+ LLVMCore
+ LLVMIRReader
+ LLVMBitWriter
+ LLVMRemarks
+ LLVMProfileData
+)
+
+# Set include directories
+target_include_directories(llvm-advisor PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+# Install the Python view module alongside the binary
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../view/
+ DESTINATION ${CMAKE_INSTALL_BINDIR}/view
+ FILES_MATCHING
+ PATTERN "*.py"
+ PATTERN "*.html"
+ PATTERN "*.css"
+ PATTERN "*.js"
+ PATTERN "*.md"
+)
diff --git a/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp
new file mode 100644
index 0000000000000..69f1e3d52702e
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.cpp
@@ -0,0 +1,64 @@
+#include "AdvisorConfig.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace advisor {
+
+AdvisorConfig::AdvisorConfig() {
+ // Use relative path as default, will be resolved by CompilationManager
+ OutputDir_ = ".llvm-advisor";
+}
+
+Expected<bool> AdvisorConfig::loadFromFile(const std::string &path) {
+ auto BufferOrError = MemoryBuffer::getFile(path);
+ if (!BufferOrError) {
+ return createStringError(BufferOrError.getError(),
+ "Cannot read config file");
+ }
+
+ auto Buffer = std::move(*BufferOrError);
+ Expected<json::Value> JsonOrError = json::parse(Buffer->getBuffer());
+ if (!JsonOrError) {
+ return JsonOrError.takeError();
+ }
+
+ auto &Json = *JsonOrError;
+ auto *Obj = Json.getAsObject();
+ if (!Obj) {
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Config file must contain JSON object");
+ }
+
+ if (auto outputDirOpt = Obj->getString("outputDir"); outputDirOpt) {
+ OutputDir_ = outputDirOpt->str();
+ }
+
+ if (auto verboseOpt = Obj->getBoolean("verbose"); verboseOpt) {
+ Verbose_ = *verboseOpt;
+ }
+
+ if (auto keepTempsOpt = Obj->getBoolean("keepTemps"); keepTempsOpt) {
+ KeepTemps_ = *keepTempsOpt;
+ }
+
+ if (auto runProfileOpt = Obj->getBoolean("runProfiler"); runProfileOpt) {
+ RunProfiler_ = *runProfileOpt;
+ }
+
+ if (auto timeoutOpt = Obj->getInteger("timeout"); timeoutOpt) {
+ TimeoutSeconds_ = static_cast<int>(*timeoutOpt);
+ }
+
+ return true;
+}
+
+std::string AdvisorConfig::getToolPath(const std::string &tool) const {
+ // For now, just return the tool name and rely on PATH
+ return tool;
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h
new file mode 100644
index 0000000000000..b7f553fddbb23
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Config/AdvisorConfig.h
@@ -0,0 +1,41 @@
+#ifndef LLVM_ADVISOR_CONFIG_H
+#define LLVM_ADVISOR_CONFIG_H
+
+#include "llvm/Support/Error.h"
+#include <string>
+
+namespace llvm {
+namespace advisor {
+
+class AdvisorConfig {
+public:
+ AdvisorConfig();
+
+ Expected<bool> loadFromFile(const std::string &path);
+
+ void setOutputDir(const std::string &dir) { OutputDir_ = dir; }
+ void setVerbose(bool verbose) { Verbose_ = verbose; }
+ void setKeepTemps(bool keep) { KeepTemps_ = keep; }
+ void setRunProfiler(bool run) { RunProfiler_ = run; }
+ void setTimeout(int seconds) { TimeoutSeconds_ = seconds; }
+
+ const std::string &getOutputDir() const { return OutputDir_; }
+ bool getVerbose() const { return Verbose_; }
+ bool getKeepTemps() const { return KeepTemps_; }
+ bool getRunProfiler() const { return RunProfiler_; }
+ int getTimeout() const { return TimeoutSeconds_; }
+
+ std::string getToolPath(const std::string &tool) const;
+
+private:
+ std::string OutputDir_;
+ bool Verbose_ = false;
+ bool KeepTemps_ = false;
+ bool RunProfiler_ = true;
+ int TimeoutSeconds_ = 60;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From 65ce6d57cb17abfe0226eb847dcfc98fe04518ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 05:51:00 +0200
Subject: [PATCH 2/9] [llvm-advisor] Add utility for file and process
management
- Add FileManager for file operations.
- Add FileClassifier for compilation artifact categorization.
- Add ProcessRunner for subprocess execution.
---
.../llvm-advisor/src/Utils/FileClassifier.cpp | 136 ++++++++++++
.../llvm-advisor/src/Utils/FileClassifier.h | 26 +++
.../llvm-advisor/src/Utils/FileManager.cpp | 205 ++++++++++++++++++
.../llvm-advisor/src/Utils/FileManager.h | 46 ++++
.../llvm-advisor/src/Utils/ProcessRunner.cpp | 69 ++++++
.../llvm-advisor/src/Utils/ProcessRunner.h | 32 +++
6 files changed, 514 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Utils/FileClassifier.h
create mode 100644 llvm/tools/llvm-advisor/src/Utils/FileManager.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Utils/FileManager.h
create mode 100644 llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h
diff --git a/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp
new file mode 100644
index 0000000000000..e9b39f984c771
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.cpp
@@ -0,0 +1,136 @@
+#include "FileClassifier.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace advisor {
+
+FileClassification
+FileClassifier::classifyFile(const std::string &filePath) const {
+ StringRef filename = sys::path::filename(filePath);
+ StringRef extension = sys::path::extension(filePath);
+
+ FileClassification classification;
+ classification.isGenerated = true;
+ classification.isTemporary = false;
+
+ // LLVM IR files
+ if (extension == ".ll") {
+ classification.category = "ir";
+ classification.description = "LLVM IR text";
+ return classification;
+ }
+
+ // Assembly files
+ if (extension == ".s" || extension == ".S") {
+ classification.category = "assembly";
+ classification.description = "Assembly";
+ return classification;
+ }
+
+ // Optimization remarks
+ if (filename.ends_with(".opt.yaml") || filename.ends_with(".opt.yml")) {
+ classification.category = "remarks";
+ classification.description = "Optimization remarks";
+ return classification;
+ }
+
+ // Preprocessed files
+ if (extension == ".i" || extension == ".ii") {
+ classification.category = "preprocessed";
+ classification.description = "Preprocessed source";
+ return classification;
+ }
+
+ // AST dumps
+ if (extension == ".ast" || filename.contains("ast-dump")) {
+ classification.category = "ast";
+ classification.description = "AST dump";
+ return classification;
+ }
+
+ // Profile data
+ if (extension == ".profraw" || extension == ".profdata") {
+ classification.category = "profile";
+ classification.description = "Profile data";
+ return classification;
+ }
+
+ // Include trees
+ if (filename.contains(".include.") || filename.contains("include-tree")) {
+ classification.category = "include-tree";
+ classification.description = "Include tree";
+ return classification;
+ }
+
+ // Debug info
+ if (filename.contains("debug") || filename.contains("dwarf")) {
+ classification.category = "debug";
+ classification.description = "Debug information";
+ return classification;
+ }
+
+ // Static analyzer output
+ if (filename.contains("analysis") || filename.contains("analyzer")) {
+ classification.category = "static-analyzer";
+ classification.description = "Static analyzer output";
+ return classification;
+ }
+
+ // Macro expansion
+ if (filename.contains("macro-expanded")) {
+ classification.category = "macro-expansion";
+ classification.description = "Macro expansion";
+ return classification;
+ }
+
+ // Compilation phases
+ if (filename.contains("phases")) {
+ classification.category = "compilation-phases";
+ classification.description = "Compilation phases";
+ return classification;
+ }
+
+ // Control flow graph
+ if (extension == ".dot" || filename.contains("cfg")) {
+ classification.category = "cfg";
+ classification.description = "Control flow graph";
+ return classification;
+ }
+
+ // Template instantiation
+ if (filename.contains("template") || filename.contains("instantiation")) {
+ classification.category = "template-instantiation";
+ classification.description = "Template instantiation";
+ return classification;
+ }
+
+ // Default for unknown files
+ classification.category = "unknown";
+ classification.description = "Unknown file type";
+ classification.isGenerated = false;
+ return classification;
+}
+
+bool FileClassifier::shouldCollect(const std::string &filePath) const {
+ auto classification = classifyFile(filePath);
+ return classification.category != "unknown" && classification.isGenerated &&
+ !classification.isTemporary;
+}
+
+std::string FileClassifier::getLanguage(const std::string &filePath) const {
+ StringRef extension = sys::path::extension(filePath);
+
+ if (extension == ".c")
+ return "C";
+ if (extension == ".cpp" || extension == ".cc" || extension == ".cxx" ||
+ extension == ".C")
+ return "C++";
+ if (extension == ".h" || extension == ".hpp" || extension == ".hh" ||
+ extension == ".hxx")
+ return "Header";
+
+ return "Unknown";
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h
new file mode 100644
index 0000000000000..6bf7c43ba4ffc
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/FileClassifier.h
@@ -0,0 +1,26 @@
+#ifndef LLVM_ADVISOR_FILE_CLASSIFIER_H
+#define LLVM_ADVISOR_FILE_CLASSIFIER_H
+
+#include <string>
+
+namespace llvm {
+namespace advisor {
+
+struct FileClassification {
+ std::string category;
+ std::string description;
+ bool isTemporary = false;
+ bool isGenerated = true;
+};
+
+class FileClassifier {
+public:
+ FileClassification classifyFile(const std::string &filePath) const;
+ bool shouldCollect(const std::string &filePath) const;
+ std::string getLanguage(const std::string &filePath) const;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp b/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp
new file mode 100644
index 0000000000000..7083d7edb7f3d
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/FileManager.cpp
@@ -0,0 +1,205 @@
+#include "FileManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+#include <system_error>
+
+namespace llvm {
+namespace advisor {
+
+Expected<std::string> FileManager::createTempDir(const std::string &prefix) {
+ SmallString<128> tempDirPath;
+ if (std::error_code ec =
+ sys::fs::createUniqueDirectory(prefix, tempDirPath)) {
+ return createStringError(ec, "Failed to create unique temporary directory");
+ }
+ return std::string(tempDirPath.str());
+}
+
+Error FileManager::copyDirectory(const std::string &source,
+ const std::string &dest) {
+ std::error_code EC;
+
+ SmallString<128> sourcePathNorm(source);
+ // Remove trailing slash manually if present
+ if (sourcePathNorm.ends_with("/") && sourcePathNorm.size() > 1) {
+ sourcePathNorm.pop_back();
+ }
+
+ for (sys::fs::recursive_directory_iterator I(source, EC), E; I != E && !EC;
+ I.increment(EC)) {
+ StringRef currentPath = I->path();
+ SmallString<128> destPath(dest);
+
+ StringRef relativePath = currentPath;
+ if (!relativePath.consume_front(sourcePathNorm)) {
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "Path '" + currentPath.str() + "' not in source dir '" + source +
+ "'");
+ }
+ // Remove leading slash manually if present
+ if (relativePath.starts_with("/")) {
+ relativePath = relativePath.drop_front(1);
+ }
+
+ sys::path::append(destPath, relativePath);
+
+ if (sys::fs::is_directory(currentPath)) {
+ if (sys::fs::create_directories(destPath)) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to create directory: " +
+ destPath.str().str());
+ }
+ } else {
+ if (sys::fs::create_directories(sys::path::parent_path(destPath))) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to create parent directory for: " +
+ destPath.str().str());
+ }
+ if (sys::fs::copy_file(currentPath, destPath)) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to copy file: " + currentPath.str());
+ }
+ }
+ }
+
+ if (EC) {
+ return createStringError(EC, "Failed to iterate directory: " + source);
+ }
+
+ return Error::success();
+}
+
+Error FileManager::removeDirectory(const std::string &path) {
+ if (!sys::fs::exists(path)) {
+ return Error::success();
+ }
+
+ std::error_code EC;
+ std::vector<std::string> Dirs;
+ for (sys::fs::recursive_directory_iterator I(path, EC), E; I != E && !EC;
+ I.increment(EC)) {
+ if (I->type() == sys::fs::file_type::directory_file) {
+ Dirs.push_back(I->path());
+ } else {
+ if (auto E = sys::fs::remove(I->path())) {
+ return createStringError(E, "Failed to remove file: " + I->path());
+ }
+ }
+ }
+
+ if (EC) {
+ return createStringError(EC, "Error iterating directory " + path);
+ }
+
+ for (const auto &Dir : llvm::reverse(Dirs)) {
+ if (auto E = sys::fs::remove(Dir)) {
+ return createStringError(E, "Failed to remove directory: " + Dir);
+ }
+ }
+
+ if (auto E = sys::fs::remove(path)) {
+ return createStringError(E,
+ "Failed to remove top-level directory: " + path);
+ }
+
+ return Error::success();
+}
+
+std::vector<std::string> FileManager::findFiles(const std::string &directory,
+ const std::string &pattern) {
+ std::vector<std::string> files;
+ std::error_code EC;
+ for (sys::fs::recursive_directory_iterator I(directory, EC), E; I != E && !EC;
+ I.increment(EC)) {
+ if (I->type() != sys::fs::file_type::directory_file) {
+ StringRef filename = sys::path::filename(I->path());
+ if (filename.find(pattern) != StringRef::npos) {
+ files.push_back(I->path());
+ }
+ }
+ }
+ return files;
+}
+
+std::vector<std::string>
+FileManager::findFilesByExtension(const std::string &directory,
+ const std::vector<std::string> &extensions) {
+ std::vector<std::string> files;
+ std::error_code EC;
+ for (sys::fs::recursive_directory_iterator I(directory, EC), E; I != E && !EC;
+ I.increment(EC)) {
+ if (I->type() != sys::fs::file_type::directory_file) {
+ StringRef filepath = I->path();
+ for (const auto &ext : extensions) {
+ if (filepath.ends_with(ext)) {
+ files.push_back(filepath.str());
+ break;
+ }
+ }
+ }
+ }
+ return files;
+}
+
+Error FileManager::moveFile(const std::string &source,
+ const std::string &dest) {
+ if (source == dest) {
+ return Error::success();
+ }
+
+ if (sys::fs::create_directories(sys::path::parent_path(dest))) {
+ return createStringError(
+ std::make_error_code(std::errc::io_error),
+ "Failed to create parent directory for destination: " + dest);
+ }
+
+ if (sys::fs::rename(source, dest)) {
+ // If rename fails, try copy and remove
+ if (sys::fs::copy_file(source, dest)) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to move file (copy failed): " + source);
+ }
+ if (sys::fs::remove(source)) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to move file (source removal failed): " +
+ source);
+ }
+ }
+
+ return Error::success();
+}
+
+Error FileManager::copyFile(const std::string &source,
+ const std::string &dest) {
+ if (source == dest) {
+ return Error::success();
+ }
+
+ if (sys::fs::create_directories(sys::path::parent_path(dest))) {
+ return createStringError(
+ std::make_error_code(std::errc::io_error),
+ "Failed to create parent directory for destination: " + dest);
+ }
+
+ if (sys::fs::copy_file(source, dest)) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Failed to copy file: " + source);
+ }
+
+ return Error::success();
+}
+
+Expected<size_t> FileManager::getFileSize(const std::string &path) {
+ sys::fs::file_status status;
+ if (auto EC = sys::fs::status(path, status)) {
+ return createStringError(EC, "File not found: " + path);
+ }
+
+ return status.getSize();
+}
+
+} // namespace advisor
+} // namespace llvm
\ No newline at end of file
diff --git a/llvm/tools/llvm-advisor/src/Utils/FileManager.h b/llvm/tools/llvm-advisor/src/Utils/FileManager.h
new file mode 100644
index 0000000000000..07b49e647f542
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/FileManager.h
@@ -0,0 +1,46 @@
+#ifndef LLVM_ADVISOR_FILE_MANAGER_H
+#define LLVM_ADVISOR_FILE_MANAGER_H
+
+#include "llvm/Support/Error.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class FileManager {
+public:
+ /// Create unique temporary directory with pattern llvm-advisor-xxxxx
+ static Expected<std::string>
+ createTempDir(const std::string &prefix = "llvm-advisor");
+
+ /// Recursively copy directory
+ static Error copyDirectory(const std::string &source,
+ const std::string &dest);
+
+ /// Remove directory and contents
+ static Error removeDirectory(const std::string &path);
+
+ /// Find files matching pattern
+ static std::vector<std::string> findFiles(const std::string &directory,
+ const std::string &pattern);
+
+ /// Find files by extension
+ static std::vector<std::string>
+ findFilesByExtension(const std::string &directory,
+ const std::vector<std::string> &extensions);
+
+ /// Move file from source to destination
+ static Error moveFile(const std::string &source, const std::string &dest);
+
+ /// Copy file from source to destination
+ static Error copyFile(const std::string &source, const std::string &dest);
+
+ /// Get file size
+ static Expected<size_t> getFileSize(const std::string &path);
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp
new file mode 100644
index 0000000000000..b08b3cc88a434
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.cpp
@@ -0,0 +1,69 @@
+#include "ProcessRunner.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+
+namespace llvm {
+namespace advisor {
+
+Expected<ProcessRunner::ProcessResult>
+ProcessRunner::run(const std::string &program,
+ const std::vector<std::string> &args, int timeoutSeconds) {
+
+ auto programPath = sys::findProgramByName(program);
+ if (!programPath) {
+ return createStringError(programPath.getError(),
+ "Tool not found: " + program);
+ }
+
+ std::vector<StringRef> execArgs;
+ execArgs.push_back(program);
+ for (const auto &arg : args) {
+ execArgs.push_back(arg);
+ }
+
+ SmallString<128> stdoutPath, stderrPath;
+ sys::fs::createTemporaryFile("stdout", "tmp", stdoutPath);
+ sys::fs::createTemporaryFile("stderr", "tmp", stderrPath);
+
+ std::optional<StringRef> redirects[] = {
+ std::nullopt, // stdin
+ StringRef(stdoutPath), // stdout
+ StringRef(stderrPath) // stderr
+ };
+
+ int exitCode = sys::ExecuteAndWait(*programPath, execArgs, std::nullopt,
+ redirects, timeoutSeconds);
+
+ ProcessResult result;
+ result.exitCode = exitCode;
+ // TODO: Collect information about compilation time
+ result.executionTime = 0; // not tracking time
+
+ auto stdoutBuffer = MemoryBuffer::getFile(stdoutPath);
+ if (stdoutBuffer) {
+ result.stdout = (*stdoutBuffer)->getBuffer().str();
+ }
+
+ auto stderrBuffer = MemoryBuffer::getFile(stderrPath);
+ if (stderrBuffer) {
+ result.stderr = (*stderrBuffer)->getBuffer().str();
+ }
+
+ sys::fs::remove(stdoutPath);
+ sys::fs::remove(stderrPath);
+
+ return result;
+}
+
+Expected<ProcessRunner::ProcessResult> ProcessRunner::runWithEnv(
+ const std::string &program, const std::vector<std::string> &args,
+ const std::vector<std::string> &env, int timeoutSeconds) {
+
+ // For simplicity, just use the regular run method
+ // Environment variables can be added later if needed
+ return run(program, args, timeoutSeconds);
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h
new file mode 100644
index 0000000000000..ffd0ef353ba16
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Utils/ProcessRunner.h
@@ -0,0 +1,32 @@
+#ifndef LLVM_ADVISOR_PROCESS_RUNNER_H
+#define LLVM_ADVISOR_PROCESS_RUNNER_H
+
+#include "llvm/Support/Error.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class ProcessRunner {
+public:
+ struct ProcessResult {
+ int exitCode;
+ std::string stdout;
+ std::string stderr;
+ double executionTime;
+ };
+
+ static Expected<ProcessResult> run(const std::string &program,
+ const std::vector<std::string> &args,
+ int timeoutSeconds = 60);
+
+ static Expected<ProcessResult>
+ runWithEnv(const std::string &program, const std::vector<std::string> &args,
+ const std::vector<std::string> &env, int timeoutSeconds = 60);
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From cc3e1ed4848ff6b7753cf731779602e4e7593309 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:09:39 +0200
Subject: [PATCH 3/9] [llvm-advisor] Add basic build/compilation data models
Introduce data structures that represent a single build phase
and compilation unit.
---
.../llvm-advisor/src/Core/BuildContext.h | 52 +++++++++++++++
.../llvm-advisor/src/Core/CompilationUnit.cpp | 66 +++++++++++++++++++
.../llvm-advisor/src/Core/CompilationUnit.h | 58 ++++++++++++++++
3 files changed, 176 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Core/BuildContext.h
create mode 100644 llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Core/CompilationUnit.h
diff --git a/llvm/tools/llvm-advisor/src/Core/BuildContext.h b/llvm/tools/llvm-advisor/src/Core/BuildContext.h
new file mode 100644
index 0000000000000..4f40c37ca8706
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/BuildContext.h
@@ -0,0 +1,52 @@
+#ifndef LLVM_ADVISOR_BUILD_CONTEXT_H
+#define LLVM_ADVISOR_BUILD_CONTEXT_H
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+enum class BuildPhase {
+ Unknown,
+ Preprocessing,
+ Compilation,
+ Assembly,
+ Linking,
+ Archiving,
+ CMakeConfigure,
+ CMakeBuild,
+ MakefileBuild
+};
+
+enum class BuildTool {
+ Unknown,
+ Clang,
+ GCC,
+ LLVM_Tools,
+ CMake,
+ Make,
+ Ninja,
+ Linker,
+ Archiver
+};
+
+struct BuildContext {
+ BuildPhase phase;
+ BuildTool tool;
+ std::string workingDirectory;
+ std::string outputDirectory;
+ std::vector<std::string> inputFiles;
+ std::vector<std::string> outputFiles;
+ std::vector<std::string> expectedGeneratedFiles;
+ std::map<std::string, std::string> metadata;
+ bool hasOffloading = false;
+ bool hasDebugInfo = false;
+ bool hasOptimization = false;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp
new file mode 100644
index 0000000000000..8b6a478cfaf63
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.cpp
@@ -0,0 +1,66 @@
+#include "CompilationUnit.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace advisor {
+
+CompilationUnit::CompilationUnit(const CompilationUnitInfo &info,
+ const std::string &workDir)
+ : info_(info), workDir_(workDir) {
+ // Create unit-specific data directory
+ SmallString<128> dataDir;
+ sys::path::append(dataDir, workDir, "units", info.name);
+ sys::fs::create_directories(dataDir);
+}
+
+std::string CompilationUnit::getPrimarySource() const {
+ if (info_.sources.empty()) {
+ return "";
+ }
+ return info_.sources[0].path;
+}
+
+std::string CompilationUnit::getDataDir() const {
+ SmallString<128> dataDir;
+ sys::path::append(dataDir, workDir_, "units", info_.name);
+ return dataDir.str().str();
+}
+
+std::string CompilationUnit::getExecutablePath() const {
+ return info_.outputExecutable;
+}
+
+void CompilationUnit::addGeneratedFile(const std::string &type,
+ const std::string &path) {
+ generatedFiles_[type].push_back(path);
+}
+
+bool CompilationUnit::hasGeneratedFiles(const std::string &type) const {
+ if (type.empty()) {
+ return !generatedFiles_.empty();
+ }
+ auto it = generatedFiles_.find(type);
+ return it != generatedFiles_.end() && !it->second.empty();
+}
+
+std::vector<std::string>
+CompilationUnit::getGeneratedFiles(const std::string &type) const {
+ if (type.empty()) {
+ std::vector<std::string> allFiles;
+ for (const auto &pair : generatedFiles_) {
+ allFiles.insert(allFiles.end(), pair.second.begin(), pair.second.end());
+ }
+ return allFiles;
+ }
+ auto it = generatedFiles_.find(type);
+ return it != generatedFiles_.end() ? it->second : std::vector<std::string>();
+}
+
+const std::unordered_map<std::string, std::vector<std::string>> &
+CompilationUnit::getAllGeneratedFiles() const {
+ return generatedFiles_;
+}
+
+} // namespace advisor
+} // namespace llvm
\ No newline at end of file
diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h
new file mode 100644
index 0000000000000..18dbc35ab5aec
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CompilationUnit.h
@@ -0,0 +1,58 @@
+#ifndef LLVM_ADVISOR_COMPILATION_UNIT_H
+#define LLVM_ADVISOR_COMPILATION_UNIT_H
+
+#include "llvm/Support/Error.h"
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+struct SourceFile {
+ std::string path;
+ std::string language;
+ bool isHeader = false;
+ std::vector<std::string> dependencies;
+};
+
+struct CompilationUnitInfo {
+ std::string name;
+ std::vector<SourceFile> sources;
+ std::vector<std::string> compileFlags;
+ std::string targetArch;
+ bool hasOffloading = false;
+ std::string outputObject;
+ std::string outputExecutable;
+};
+
+class CompilationUnit {
+public:
+ CompilationUnit(const CompilationUnitInfo &info, const std::string &workDir);
+
+ const std::string &getName() const { return info_.name; }
+ const CompilationUnitInfo &getInfo() const { return info_; }
+ const std::string &getWorkDir() const { return workDir_; }
+ std::string getPrimarySource() const;
+
+ std::string getDataDir() const;
+ std::string getExecutablePath() const;
+
+ void addGeneratedFile(const std::string &type, const std::string &path);
+
+ bool hasGeneratedFiles(const std::string &type) const;
+ std::vector<std::string>
+ getGeneratedFiles(const std::string &type = "") const;
+ const std::unordered_map<std::string, std::vector<std::string>> &
+ getAllGeneratedFiles() const;
+
+private:
+ CompilationUnitInfo info_;
+ std::string workDir_;
+ std::unordered_map<std::string, std::vector<std::string>> generatedFiles_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
\ No newline at end of file
>From 336db515d67b787c3e63d33e77f2c70787f1a578 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:18:52 +0200
Subject: [PATCH 4/9] [llvm-advisor] Add command analyzer helper
CommandAnalyzer inspects an incoming compiler or build-system
invocation and classifies the tool in use, the build phase,
input/output files and notable flags.
---
.../llvm-advisor/src/Core/CommandAnalyzer.cpp | 167 ++++++++++++++++++
.../llvm-advisor/src/Core/CommandAnalyzer.h | 32 ++++
2 files changed, 199 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h
diff --git a/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp
new file mode 100644
index 0000000000000..3192c42669e65
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.cpp
@@ -0,0 +1,167 @@
+#include "CommandAnalyzer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace advisor {
+
+CommandAnalyzer::CommandAnalyzer(const std::string &command,
+ const std::vector<std::string> &args)
+ : command_(command), args_(args) {}
+
+BuildContext CommandAnalyzer::analyze() const {
+ BuildContext context;
+ SmallString<256> cwd;
+ sys::fs::current_path(cwd);
+ context.workingDirectory = cwd.str().str();
+
+ context.tool = detectBuildTool();
+ context.phase = detectBuildPhase(context.tool);
+ context.inputFiles = extractInputFiles();
+ context.outputFiles = extractOutputFiles();
+ detectBuildFeatures(context);
+
+ return context;
+}
+
+BuildTool CommandAnalyzer::detectBuildTool() const {
+ return StringSwitch<BuildTool>(sys::path::filename(command_))
+ .StartsWith("clang", BuildTool::Clang)
+ .StartsWith("gcc", BuildTool::GCC)
+ .StartsWith("g++", BuildTool::GCC)
+ .Case("cmake", BuildTool::CMake)
+ .Case("make", BuildTool::Make)
+ .Case("ninja", BuildTool::Ninja)
+ .EndsWith("-ld", BuildTool::Linker)
+ .Case("ld", BuildTool::Linker)
+ .Case("ar", BuildTool::Archiver)
+ .Case("llvm-ar", BuildTool::Archiver)
+ .StartsWith("llvm-", BuildTool::LLVM_Tools)
+ .Default(BuildTool::Unknown);
+}
+
+BuildPhase CommandAnalyzer::detectBuildPhase(BuildTool tool) const {
+ if (tool == BuildTool::CMake) {
+ for (const auto &arg : args_) {
+ if (arg == "--build")
+ return BuildPhase::CMakeBuild;
+ }
+ return BuildPhase::CMakeConfigure;
+ }
+
+ if (tool == BuildTool::Make || tool == BuildTool::Ninja) {
+ return BuildPhase::MakefileBuild;
+ }
+
+ if (tool == BuildTool::Linker) {
+ return BuildPhase::Linking;
+ }
+
+ if (tool == BuildTool::Archiver) {
+ return BuildPhase::Archiving;
+ }
+
+ if (tool == BuildTool::Clang || tool == BuildTool::GCC) {
+ for (const auto &arg : args_) {
+ if (arg == "-E")
+ return BuildPhase::Preprocessing;
+ if (arg == "-S")
+ return BuildPhase::Assembly;
+ if (arg == "-c")
+ return BuildPhase::Compilation;
+ }
+
+ bool hasObjectFile = false;
+ for (const auto &Arg : args_) {
+ StringRef argRef(Arg);
+ if (argRef.ends_with(".o") || argRef.ends_with(".O") ||
+ argRef.ends_with(".obj") || argRef.ends_with(".OBJ")) {
+ hasObjectFile = true;
+ break;
+ }
+ }
+ if (hasObjectFile) {
+ return BuildPhase::Linking;
+ }
+
+ bool hasSourceFile = false;
+ for (const auto &Arg : args_) {
+ StringRef argRef(Arg);
+ if (argRef.ends_with(".c") || argRef.ends_with(".C") ||
+ argRef.ends_with(".cpp") || argRef.ends_with(".CPP") ||
+ argRef.ends_with(".cc") || argRef.ends_with(".CC") ||
+ argRef.ends_with(".cxx") || argRef.ends_with(".CXX")) {
+ hasSourceFile = true;
+ break;
+ }
+ }
+ if (hasSourceFile) {
+ return BuildPhase::Compilation; // Default for source files
+ }
+ }
+
+ return BuildPhase::Unknown;
+}
+
+void CommandAnalyzer::detectBuildFeatures(BuildContext &context) const {
+ for (const auto &arg : args_) {
+ if (arg == "-g" || StringRef(arg).starts_with("-g")) {
+ context.hasDebugInfo = true;
+ }
+
+ if (StringRef(arg).starts_with("-O") && arg.length() > 2) {
+ context.hasOptimization = true;
+ }
+
+ if (arg.find("openmp") != std::string::npos ||
+ arg.find("openacc") != std::string::npos ||
+ arg.find("cuda") != std::string::npos ||
+ arg.find("offload") != std::string::npos) {
+ context.hasOffloading = true;
+ }
+
+ if (StringRef(arg).starts_with("-march=")) {
+ context.metadata["target_arch"] = arg.substr(7);
+ }
+ if (StringRef(arg).starts_with("-mtune=")) {
+ context.metadata["tune"] = arg.substr(7);
+ }
+ if (StringRef(arg).starts_with("--offload-arch=")) {
+ context.metadata["offload_arch"] = arg.substr(15);
+ }
+ }
+}
+
+std::vector<std::string> CommandAnalyzer::extractInputFiles() const {
+ std::vector<std::string> inputs;
+ for (size_t i = 0; i < args_.size(); ++i) {
+ const auto &arg = args_[i];
+ if (StringRef(arg).starts_with("-")) {
+ if (arg == "-o" || arg == "-I" || arg == "-L" || arg == "-D") {
+ i++;
+ }
+ continue;
+ }
+ if (sys::fs::exists(arg)) {
+ inputs.push_back(arg);
+ }
+ }
+ return inputs;
+}
+
+std::vector<std::string> CommandAnalyzer::extractOutputFiles() const {
+ std::vector<std::string> outputs;
+ for (size_t i = 0; i < args_.size(); ++i) {
+ const auto &arg = args_[i];
+ if (arg == "-o" && i + 1 < args_.size()) {
+ outputs.push_back(args_[i + 1]);
+ i++;
+ }
+ }
+ return outputs;
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h
new file mode 100644
index 0000000000000..c3efdff147e5f
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CommandAnalyzer.h
@@ -0,0 +1,32 @@
+#ifndef LLVM_ADVISOR_COMMAND_ANALYZER_H
+#define LLVM_ADVISOR_COMMAND_ANALYZER_H
+
+#include "BuildContext.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class CommandAnalyzer {
+public:
+ CommandAnalyzer(const std::string &command,
+ const std::vector<std::string> &args);
+
+ BuildContext analyze() const;
+
+private:
+ BuildTool detectBuildTool() const;
+ BuildPhase detectBuildPhase(BuildTool tool) const;
+ void detectBuildFeatures(BuildContext &context) const;
+ std::vector<std::string> extractInputFiles() const;
+ std::vector<std::string> extractOutputFiles() const;
+
+ std::string command_;
+ std::vector<std::string> args_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From ebdb0b26b8ef67ce3e71c02a8e90232d9018a567 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:24:53 +0200
Subject: [PATCH 5/9] [llvm-advisor] Add support for builds with extra compiler
data
This change adds logic to run compiler processes and automatically
add options to collect optimization remarks, profiling data, and
debug information when needed.
---
.../llvm-advisor/src/Core/BuildExecutor.cpp | 109 ++++++++++++++++++
.../llvm-advisor/src/Core/BuildExecutor.h | 34 ++++++
2 files changed, 143 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Core/BuildExecutor.h
diff --git a/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp
new file mode 100644
index 0000000000000..a4af5a660c80e
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.cpp
@@ -0,0 +1,109 @@
+#include "BuildExecutor.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace advisor {
+
+BuildExecutor::BuildExecutor(const AdvisorConfig &config) : config_(config) {}
+
+Expected<int> BuildExecutor::execute(const std::string &compiler,
+ const std::vector<std::string> &args,
+ BuildContext &buildContext,
+ const std::string &tempDir) {
+ auto instrumentedArgs = instrumentCompilerArgs(args, buildContext, tempDir);
+
+ auto compilerPath = sys::findProgramByName(compiler);
+ if (!compilerPath) {
+ return createStringError(
+ std::make_error_code(std::errc::no_such_file_or_directory),
+ "Compiler not found: " + compiler);
+ }
+
+ std::vector<StringRef> execArgs;
+ execArgs.push_back(compiler);
+ for (const auto &arg : instrumentedArgs) {
+ execArgs.push_back(arg);
+ }
+
+ if (config_.getVerbose()) {
+ outs() << "Executing: " << compiler;
+ for (const auto &arg : instrumentedArgs) {
+ outs() << " " << arg;
+ }
+ outs() << "\n";
+ }
+
+ return sys::ExecuteAndWait(*compilerPath, execArgs);
+}
+
+std::vector<std::string>
+BuildExecutor::instrumentCompilerArgs(const std::vector<std::string> &args,
+ BuildContext &buildContext,
+ const std::string &tempDir) {
+
+ std::vector<std::string> result = args;
+ std::set<std::string> existingFlags;
+
+ // Scan existing flags to avoid duplication
+ for (const auto &arg : args) {
+ if (arg.find("-g") == 0)
+ existingFlags.insert("debug");
+ if (arg.find("-fsave-optimization-record") != std::string::npos)
+ existingFlags.insert("remarks");
+ if (arg.find("-fprofile-instr-generate") != std::string::npos)
+ existingFlags.insert("profile");
+ }
+
+ // Add debug info if not present
+ if (existingFlags.find("debug") == existingFlags.end()) {
+ result.push_back("-g");
+ }
+
+ // Add optimization remarks with proper redirection
+ if (existingFlags.find("remarks") == existingFlags.end()) {
+ result.push_back("-fsave-optimization-record");
+ result.push_back("-foptimization-record-file=" + tempDir +
+ "/remarks.opt.yaml");
+ buildContext.expectedGeneratedFiles.push_back(tempDir +
+ "/remarks.opt.yaml");
+ } else {
+ // If user already specified remarks, find and redirect the file
+ bool foundFileFlag = false;
+ for (auto &arg : result) {
+ if (arg.find("-foptimization-record-file=") != std::string::npos) {
+ // Extract filename and redirect to temp
+ StringRef existingPath = StringRef(arg).substr(26);
+ StringRef filename = sys::path::filename(existingPath);
+ arg = "-foptimization-record-file=" + tempDir + "/" + filename.str();
+ buildContext.expectedGeneratedFiles.push_back(tempDir + "/" +
+ filename.str());
+ foundFileFlag = true;
+ break;
+ }
+ }
+ // If no explicit file specified, add our own
+ if (!foundFileFlag) {
+ result.push_back("-foptimization-record-file=" + tempDir +
+ "/remarks.opt.yaml");
+ buildContext.expectedGeneratedFiles.push_back(tempDir +
+ "/remarks.opt.yaml");
+ }
+ }
+
+ // Add profiling if enabled and not present, redirect to temp directory
+ if (config_.getRunProfiler() &&
+ existingFlags.find("profile") == existingFlags.end()) {
+ result.push_back("-fprofile-instr-generate=" + tempDir +
+ "/profile.profraw");
+ result.push_back("-fcoverage-mapping");
+ buildContext.expectedGeneratedFiles.push_back(tempDir + "/profile.profraw");
+ }
+
+ return result;
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h
new file mode 100644
index 0000000000000..a77ffd70c9b57
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/BuildExecutor.h
@@ -0,0 +1,34 @@
+#ifndef LLVM_ADVISOR_BUILD_EXECUTOR_H
+#define LLVM_ADVISOR_BUILD_EXECUTOR_H
+
+#include "../Config/AdvisorConfig.h"
+#include "BuildContext.h"
+#include "llvm/Support/Error.h"
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class BuildExecutor {
+public:
+ BuildExecutor(const AdvisorConfig &config);
+
+ Expected<int> execute(const std::string &compiler,
+ const std::vector<std::string> &args,
+ BuildContext &buildContext, const std::string &tempDir);
+
+private:
+ std::vector<std::string>
+ instrumentCompilerArgs(const std::vector<std::string> &args,
+ BuildContext &buildContext,
+ const std::string &tempDir);
+
+ const AdvisorConfig &config_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From 8b2f6e9d63705e70332b048db86bae27acd73db4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:34:37 +0200
Subject: [PATCH 6/9] [llvm-advisor] Add build coordinator support
This change adds logic to manage builds end to end.
It runs the build process, calls the detector, extracts data,
and moves generated files to the output directory.
---
.../src/Core/CompilationManager.cpp | 257 ++++++++++++++++++
.../src/Core/CompilationManager.h | 45 +++
2 files changed, 302 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Core/CompilationManager.h
diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp b/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp
new file mode 100644
index 0000000000000..e07db9d365009
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CompilationManager.cpp
@@ -0,0 +1,257 @@
+#include "CompilationManager.h"
+#include "../Detection/UnitDetector.h"
+#include "../Utils/FileManager.h"
+#include "CommandAnalyzer.h"
+#include "DataExtractor.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include <chrono>
+#include <cstdlib>
+#include <set>
+
+namespace llvm {
+namespace advisor {
+
+CompilationManager::CompilationManager(const AdvisorConfig &config)
+ : config_(config), buildExecutor_(config) {
+
+ // Get current working directory first
+ SmallString<256> currentDir;
+ sys::fs::current_path(currentDir);
+ initialWorkingDir_ = currentDir.str().str();
+
+ // Create temp directory with proper error handling
+ SmallString<128> tempDirPath;
+ if (auto EC = sys::fs::createUniqueDirectory("llvm-advisor", tempDirPath)) {
+ // Use timestamp for temp folder naming
+ auto now = std::chrono::system_clock::now();
+ auto timestamp =
+ std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch())
+ .count();
+ tempDir_ = "/tmp/llvm-advisor-" + std::to_string(timestamp);
+ sys::fs::create_directories(tempDir_);
+ } else {
+ tempDir_ = tempDirPath.str().str();
+ }
+
+ // Ensure the directory actually exists
+ if (!sys::fs::exists(tempDir_)) {
+ sys::fs::create_directories(tempDir_);
+ }
+
+ if (config_.getVerbose()) {
+ outs() << "Using temporary directory: " << tempDir_ << "\n";
+ }
+}
+
+CompilationManager::~CompilationManager() {
+ if (!config_.getKeepTemps() && sys::fs::exists(tempDir_)) {
+ sys::fs::remove_directories(tempDir_);
+ }
+}
+
+Expected<int> CompilationManager::executeWithDataCollection(
+ const std::string &compiler, const std::vector<std::string> &args) {
+
+ // Analyze the build command
+ BuildContext buildContext = CommandAnalyzer(compiler, args).analyze();
+
+ if (config_.getVerbose()) {
+ outs() << "Build phase: " << static_cast<int>(buildContext.phase) << "\n";
+ }
+
+ // Skip data collection for linking/archiving phases
+ if (buildContext.phase == BuildPhase::Linking ||
+ buildContext.phase == BuildPhase::Archiving) {
+ return buildExecutor_.execute(compiler, args, buildContext, tempDir_);
+ }
+
+ // Detect compilation units
+ UnitDetector detector(config_);
+ auto detectedUnits = detector.detectUnits(compiler, args);
+ if (!detectedUnits) {
+ return detectedUnits.takeError();
+ }
+
+ std::vector<std::unique_ptr<CompilationUnit>> units;
+ for (auto &unitInfo : *detectedUnits) {
+ units.push_back(std::make_unique<CompilationUnit>(unitInfo, tempDir_));
+ }
+
+ // Scan existing files before compilation
+ auto existingFiles = scanDirectory(initialWorkingDir_);
+
+ // Execute compilation with instrumentation
+ auto execResult =
+ buildExecutor_.execute(compiler, args, buildContext, tempDir_);
+ if (!execResult) {
+ return execResult;
+ }
+ int exitCode = *execResult;
+
+ // Collect generated files (even if compilation failed for analysis)
+ collectGeneratedFiles(existingFiles, units);
+
+ // Extract additional data
+ DataExtractor extractor(config_);
+ for (auto &unit : units) {
+ if (auto Err = extractor.extractAllData(*unit, tempDir_)) {
+ if (config_.getVerbose()) {
+ errs() << "Data extraction failed: " << toString(std::move(Err))
+ << "\n";
+ }
+ }
+ }
+
+ // Organize output
+ if (auto Err = organizeOutput(units)) {
+ if (config_.getVerbose()) {
+ errs() << "Output organization failed: " << toString(std::move(Err))
+ << "\n";
+ }
+ }
+
+ // Clean up leaked files from source directory
+ cleanupLeakedFiles();
+
+ return exitCode;
+}
+
+std::set<std::string>
+CompilationManager::scanDirectory(const std::string &dir) const {
+ std::set<std::string> files;
+ std::error_code EC;
+ for (sys::fs::directory_iterator DI(dir, EC), DE; DI != DE && !EC;
+ DI.increment(EC)) {
+ if (DI->type() != sys::fs::file_type::directory_file) {
+ files.insert(DI->path());
+ }
+ }
+ return files;
+}
+
+void CompilationManager::collectGeneratedFiles(
+ const std::set<std::string> &existingFiles,
+ std::vector<std::unique_ptr<CompilationUnit>> &units) {
+ FileClassifier classifier;
+
+ // Collect files from temp directory
+ std::error_code EC;
+ for (sys::fs::recursive_directory_iterator DI(tempDir_, EC), DE;
+ DI != DE && !EC; DI.increment(EC)) {
+ if (DI->type() != sys::fs::file_type::directory_file) {
+ std::string filePath = DI->path();
+ if (classifier.shouldCollect(filePath)) {
+ auto classification = classifier.classifyFile(filePath);
+
+ // Add to appropriate unit
+ if (!units.empty()) {
+ units[0]->addGeneratedFile(classification.category, filePath);
+ }
+ }
+ }
+ }
+
+ // Also check for files that leaked into source directory
+ auto currentFiles = scanDirectory(initialWorkingDir_);
+ for (const auto &file : currentFiles) {
+ if (existingFiles.find(file) == existingFiles.end()) {
+ if (classifier.shouldCollect(file)) {
+ auto classification = classifier.classifyFile(file);
+
+ // Move leaked file to temp directory
+ std::string destPath = tempDir_ + "/" + sys::path::filename(file).str();
+ if (!FileManager::moveFile(file, destPath)) {
+ if (!units.empty()) {
+ units[0]->addGeneratedFile(classification.category, destPath);
+ }
+ }
+ }
+ }
+ }
+}
+
+Error CompilationManager::organizeOutput(
+ const std::vector<std::unique_ptr<CompilationUnit>> &units) {
+ // Resolve output directory as absolute path from initial working directory
+ SmallString<256> outputDirPath;
+ if (sys::path::is_absolute(config_.getOutputDir())) {
+ outputDirPath = config_.getOutputDir();
+ } else {
+ outputDirPath = initialWorkingDir_;
+ sys::path::append(outputDirPath, config_.getOutputDir());
+ }
+
+ std::string outputDir = outputDirPath.str().str();
+
+ if (config_.getVerbose()) {
+ outs() << "Output directory: " << outputDir << "\n";
+ }
+
+ // Move collected files to organized structure
+ for (const auto &unit : units) {
+ std::string unitDir = outputDir + "/" + unit->getName();
+
+ // Remove existing unit directory if it exists
+ if (sys::fs::exists(unitDir)) {
+ if (auto EC = sys::fs::remove_directories(unitDir)) {
+ if (config_.getVerbose()) {
+ errs() << "Warning: Could not remove existing unit directory: "
+ << unitDir << "\n";
+ }
+ }
+ }
+
+ // Create fresh unit directory
+ if (auto EC = sys::fs::create_directories(unitDir)) {
+ continue; // Skip if we can't create the directory
+ }
+
+ const auto &generatedFiles = unit->getAllGeneratedFiles();
+ for (const auto &category : generatedFiles) {
+ std::string categoryDir = unitDir + "/" + category.first;
+ sys::fs::create_directories(categoryDir);
+
+ for (const auto &file : category.second) {
+ std::string destFile =
+ categoryDir + "/" + sys::path::filename(file).str();
+ if (auto Err = FileManager::copyFile(file, destFile)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to copy " << file << " to " << destFile << "\n";
+ }
+ }
+ }
+ }
+ }
+
+ return Error::success();
+}
+
+void CompilationManager::cleanupLeakedFiles() {
+ FileClassifier classifier;
+
+ // Clean up any remaining leaked files in source directory
+ auto currentFiles = scanDirectory(initialWorkingDir_);
+ for (const auto &file : currentFiles) {
+ StringRef filename = sys::path::filename(file);
+
+ // Remove optimization remarks files that leaked
+ if (filename.ends_with(".opt.yaml") || filename.ends_with(".opt.yml")) {
+ sys::fs::remove(file);
+ if (config_.getVerbose()) {
+ outs() << "Cleaned up leaked file: " << file << "\n";
+ }
+ }
+
+ // Remove profile files that leaked
+ if (filename.ends_with(".profraw") || filename.ends_with(".profdata")) {
+ sys::fs::remove(file);
+ if (config_.getVerbose()) {
+ outs() << "Cleaned up leaked file: " << file << "\n";
+ }
+ }
+ }
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Core/CompilationManager.h b/llvm/tools/llvm-advisor/src/Core/CompilationManager.h
new file mode 100644
index 0000000000000..5256042a8c464
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/CompilationManager.h
@@ -0,0 +1,45 @@
+#ifndef LLVM_ADVISOR_COMPILATION_MANAGER_H
+#define LLVM_ADVISOR_COMPILATION_MANAGER_H
+
+#include "../Config/AdvisorConfig.h"
+#include "../Utils/FileClassifier.h"
+#include "BuildExecutor.h"
+#include "CompilationUnit.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+#include <set>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class CompilationManager {
+public:
+ explicit CompilationManager(const AdvisorConfig &config);
+ ~CompilationManager();
+
+ Expected<int> executeWithDataCollection(const std::string &compiler,
+ const std::vector<std::string> &args);
+
+private:
+ std::set<std::string> scanDirectory(const std::string &dir) const;
+
+ void
+ collectGeneratedFiles(const std::set<std::string> &existingFiles,
+ std::vector<std::unique_ptr<CompilationUnit>> &units);
+
+ Error
+ organizeOutput(const std::vector<std::unique_ptr<CompilationUnit>> &units);
+
+ void cleanupLeakedFiles();
+
+ const AdvisorConfig &config_;
+ BuildExecutor buildExecutor_;
+ std::string tempDir_;
+ std::string initialWorkingDir_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From a4ab9b9a0ae60f43d78663cc94c8ef5bf78a388e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:38:54 +0200
Subject: [PATCH 7/9] [llvm-advisor] Add support for collecting extra build
outputs
Adds helpers to run the compilation with extra flags to collect IR,
assembly, AST dumps, include trees, debug info, and other data.
---
.../llvm-advisor/src/Core/DataExtractor.cpp | 367 ++++++++++++++++++
.../llvm-advisor/src/Core/DataExtractor.h | 44 +++
2 files changed, 411 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Core/DataExtractor.h
diff --git a/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp b/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp
new file mode 100644
index 0000000000000..4d709e4a6d51c
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/DataExtractor.cpp
@@ -0,0 +1,367 @@
+#include "DataExtractor.h"
+#include "../Utils/ProcessRunner.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+namespace advisor {
+
+DataExtractor::DataExtractor(const AdvisorConfig &config) : config_(config) {}
+
+Error DataExtractor::extractAllData(CompilationUnit &unit,
+ const std::string &tempDir) {
+ if (config_.getVerbose()) {
+ outs() << "Extracting data for unit: " << unit.getName() << "\n";
+ }
+
+ // Create extraction subdirectories
+ sys::fs::create_directories(tempDir + "/ir");
+ sys::fs::create_directories(tempDir + "/assembly");
+ sys::fs::create_directories(tempDir + "/ast");
+ sys::fs::create_directories(tempDir + "/preprocessed");
+ sys::fs::create_directories(tempDir + "/include-tree");
+ sys::fs::create_directories(tempDir + "/debug");
+ sys::fs::create_directories(tempDir + "/static-analyzer");
+
+ if (auto Err = extractIR(unit, tempDir))
+ return Err;
+ if (auto Err = extractAssembly(unit, tempDir))
+ return Err;
+ if (auto Err = extractAST(unit, tempDir))
+ return Err;
+ if (auto Err = extractPreprocessed(unit, tempDir))
+ return Err;
+ if (auto Err = extractIncludeTree(unit, tempDir))
+ return Err;
+ if (auto Err = extractDebugInfo(unit, tempDir))
+ return Err;
+ if (auto Err = extractStaticAnalysis(unit, tempDir))
+ return Err;
+ if (auto Err = extractMacroExpansion(unit, tempDir))
+ return Err;
+ if (auto Err = extractCompilationPhases(unit, tempDir))
+ return Err;
+
+ return Error::success();
+}
+
+std::vector<std::string>
+DataExtractor::getBaseCompilerArgs(const CompilationUnitInfo &unitInfo) const {
+ std::vector<std::string> baseArgs;
+
+ // Copy include paths and defines
+ for (const auto &arg : unitInfo.compileFlags) {
+ if (StringRef(arg).starts_with("-I") || StringRef(arg).starts_with("-D") ||
+ StringRef(arg).starts_with("-U") ||
+ StringRef(arg).starts_with("-std=") ||
+ StringRef(arg).starts_with("-m") || StringRef(arg).starts_with("-f") ||
+ StringRef(arg).starts_with("-W") || StringRef(arg).starts_with("-O")) {
+ // Skip problematic flags for extraction
+ if (StringRef(arg).starts_with("-fsave-optimization-record") ||
+ StringRef(arg).starts_with("-fprofile-instr-generate") ||
+ StringRef(arg).starts_with("-fcoverage-mapping") ||
+ StringRef(arg).starts_with("-foptimization-record-file")) {
+ continue;
+ }
+ baseArgs.push_back(arg);
+ }
+ }
+
+ return baseArgs;
+}
+
+Error DataExtractor::extractIR(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile =
+ tempDir + "/ir/" + sys::path::stem(source.path).str() + ".ll";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-emit-llvm");
+ baseArgs.push_back("-S");
+ baseArgs.push_back("-o");
+ baseArgs.push_back(outputFile);
+ baseArgs.push_back(source.path);
+
+ if (auto Err = runCompilerWithFlags(baseArgs)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to extract IR for " << source.path << "\n";
+ }
+ continue;
+ }
+
+ if (sys::fs::exists(outputFile)) {
+ unit.addGeneratedFile("ir", outputFile);
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractAssembly(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile =
+ tempDir + "/assembly/" + sys::path::stem(source.path).str() + ".s";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-S");
+ baseArgs.push_back("-o");
+ baseArgs.push_back(outputFile);
+ baseArgs.push_back(source.path);
+
+ if (auto Err = runCompilerWithFlags(baseArgs)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to extract assembly for " << source.path << "\n";
+ }
+ continue;
+ }
+
+ if (sys::fs::exists(outputFile)) {
+ unit.addGeneratedFile("assembly", outputFile);
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractAST(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile =
+ tempDir + "/ast/" + sys::path::stem(source.path).str() + ".ast";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-ast-dump");
+ baseArgs.push_back("-fsyntax-only");
+ baseArgs.push_back(source.path);
+
+ auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
+ config_.getTimeout());
+ if (result && result->exitCode == 0) {
+ std::error_code EC;
+ raw_fd_ostream OS(outputFile, EC);
+ if (!EC) {
+ OS << result->stdout;
+ unit.addGeneratedFile("ast", outputFile);
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractPreprocessed(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string ext = (source.language == "C++") ? ".ii" : ".i";
+ std::string outputFile =
+ tempDir + "/preprocessed/" + sys::path::stem(source.path).str() + ext;
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-E");
+ baseArgs.push_back("-o");
+ baseArgs.push_back(outputFile);
+ baseArgs.push_back(source.path);
+
+ if (auto Err = runCompilerWithFlags(baseArgs)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to extract preprocessed for " << source.path << "\n";
+ }
+ continue;
+ }
+
+ if (sys::fs::exists(outputFile)) {
+ unit.addGeneratedFile("preprocessed", outputFile);
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractIncludeTree(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile = tempDir + "/include-tree/" +
+ sys::path::stem(source.path).str() +
+ ".include.txt";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-H");
+ baseArgs.push_back("-fsyntax-only");
+ baseArgs.push_back(source.path);
+
+ auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
+ config_.getTimeout());
+ if (result && !result->stderr.empty()) {
+ std::error_code EC;
+ raw_fd_ostream OS(outputFile, EC);
+ if (!EC) {
+ OS << result->stderr; // Include tree goes to stderr
+ unit.addGeneratedFile("include-tree", outputFile);
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractDebugInfo(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile =
+ tempDir + "/debug/" + sys::path::stem(source.path).str() + ".debug.txt";
+ std::string objectFile =
+ tempDir + "/debug/" + sys::path::stem(source.path).str() + ".o";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-g");
+ baseArgs.push_back("-c");
+ baseArgs.push_back("-o");
+ baseArgs.push_back(objectFile);
+ baseArgs.push_back(source.path);
+
+ if (auto Err = runCompilerWithFlags(baseArgs)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to extract debug info for " << source.path << "\n";
+ }
+ continue;
+ }
+
+ // Extract DWARF info using llvm-dwarfdump
+ if (sys::fs::exists(objectFile)) {
+ std::vector<std::string> dwarfArgs = {objectFile};
+ auto result =
+ ProcessRunner::run("llvm-dwarfdump", dwarfArgs, config_.getTimeout());
+ if (result && result->exitCode == 0) {
+ std::error_code EC;
+ raw_fd_ostream OS(outputFile, EC);
+ if (!EC) {
+ OS << result->stdout;
+ unit.addGeneratedFile("debug", outputFile);
+ }
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractStaticAnalysis(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile = tempDir + "/static-analyzer/" +
+ sys::path::stem(source.path).str() +
+ ".analysis.txt";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("--analyze");
+ baseArgs.push_back("-Xanalyzer");
+ baseArgs.push_back("-analyzer-output=text");
+ baseArgs.push_back(source.path);
+
+ auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
+ config_.getTimeout());
+ if (result) {
+ std::error_code EC;
+ raw_fd_ostream OS(outputFile, EC);
+ if (!EC) {
+ OS << "STDOUT:\n" << result->stdout << "\nSTDERR:\n" << result->stderr;
+ unit.addGeneratedFile("static-analyzer", outputFile);
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractMacroExpansion(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile =
+ tempDir + "/preprocessed/" + sys::path::stem(source.path).str() +
+ ".macro-expanded" + ((source.language == "C++") ? ".ii" : ".i");
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-E");
+ baseArgs.push_back("-dM"); // Show macro definitions
+ baseArgs.push_back("-o");
+ baseArgs.push_back(outputFile);
+ baseArgs.push_back(source.path);
+
+ if (auto Err = runCompilerWithFlags(baseArgs)) {
+ if (config_.getVerbose()) {
+ errs() << "Failed to extract macro expansion for " << source.path
+ << "\n";
+ }
+ continue;
+ }
+
+ if (sys::fs::exists(outputFile)) {
+ unit.addGeneratedFile("macro-expansion", outputFile);
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::extractCompilationPhases(CompilationUnit &unit,
+ const std::string &tempDir) {
+ for (const auto &source : unit.getInfo().sources) {
+ if (source.isHeader)
+ continue;
+
+ std::string outputFile = tempDir + "/debug/" +
+ sys::path::stem(source.path).str() + ".phases.txt";
+
+ auto baseArgs = getBaseCompilerArgs(unit.getInfo());
+ baseArgs.push_back("-v"); // Verbose compilation phases
+ baseArgs.push_back("-fsyntax-only");
+ baseArgs.push_back(source.path);
+
+ auto result = ProcessRunner::run(config_.getToolPath("clang"), baseArgs,
+ config_.getTimeout());
+ if (result) {
+ std::error_code EC;
+ raw_fd_ostream OS(outputFile, EC);
+ if (!EC) {
+ OS << "COMPILATION PHASES:\n"
+ << result->stderr; // Verbose output goes to stderr
+ unit.addGeneratedFile("compilation-phases", outputFile);
+ }
+ }
+ }
+ return Error::success();
+}
+
+Error DataExtractor::runCompilerWithFlags(
+ const std::vector<std::string> &args) {
+ auto result = ProcessRunner::run(config_.getToolPath("clang"), args,
+ config_.getTimeout());
+ if (!result || result->exitCode != 0) {
+ return createStringError(std::make_error_code(std::errc::io_error),
+ "Compiler failed");
+ }
+ return Error::success();
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Core/DataExtractor.h b/llvm/tools/llvm-advisor/src/Core/DataExtractor.h
new file mode 100644
index 0000000000000..7564660ed05b9
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Core/DataExtractor.h
@@ -0,0 +1,44 @@
+#ifndef LLVM_ADVISOR_DATA_EXTRACTOR_H
+#define LLVM_ADVISOR_DATA_EXTRACTOR_H
+
+#include "../Config/AdvisorConfig.h"
+#include "CompilationUnit.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class DataExtractor {
+public:
+ DataExtractor(const AdvisorConfig &config);
+
+ Error extractAllData(CompilationUnit &unit, const std::string &tempDir);
+
+private:
+ std::vector<std::string>
+ getBaseCompilerArgs(const CompilationUnitInfo &unitInfo) const;
+
+ Error extractIR(CompilationUnit &unit, const std::string &tempDir);
+ Error extractAssembly(CompilationUnit &unit, const std::string &tempDir);
+ Error extractAST(CompilationUnit &unit, const std::string &tempDir);
+ Error extractPreprocessed(CompilationUnit &unit, const std::string &tempDir);
+ Error extractIncludeTree(CompilationUnit &unit, const std::string &tempDir);
+ Error extractDebugInfo(CompilationUnit &unit, const std::string &tempDir);
+ Error extractStaticAnalysis(CompilationUnit &unit,
+ const std::string &tempDir);
+ Error extractMacroExpansion(CompilationUnit &unit,
+ const std::string &tempDir);
+ Error extractCompilationPhases(CompilationUnit &unit,
+ const std::string &tempDir);
+
+ Error runCompilerWithFlags(const std::vector<std::string> &args);
+
+ const AdvisorConfig &config_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From a15a9782876cf1e21f525929cadbcebf93d7223d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:44:04 +0200
Subject: [PATCH 8/9] [llvm-advisor] Add support for detecting compilation
units
Adds logic to scan the origin compiler arguments, find source files,
figure out compile flags and output paths, and create a description
of each compilation unit.
This is the entry point for the analysis pipeline.
---
.../src/Detection/UnitDetector.cpp | 114 ++++++++++++++++++
.../llvm-advisor/src/Detection/UnitDetector.h | 35 ++++++
2 files changed, 149 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp
create mode 100644 llvm/tools/llvm-advisor/src/Detection/UnitDetector.h
diff --git a/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp
new file mode 100644
index 0000000000000..16d24f7a61d8f
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.cpp
@@ -0,0 +1,114 @@
+#include "UnitDetector.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace advisor {
+
+UnitDetector::UnitDetector(const AdvisorConfig &config) : config_(config) {}
+
+Expected<std::vector<CompilationUnitInfo>>
+UnitDetector::detectUnits(const std::string &compiler,
+ const std::vector<std::string> &args) {
+
+ auto sources = findSourceFiles(args);
+ if (sources.empty()) {
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "No source files found");
+ }
+
+ CompilationUnitInfo unit;
+ unit.name = generateUnitName(sources);
+ unit.sources = sources;
+
+ // Store original args but filter out source files for the compile flags
+ for (const auto &arg : args) {
+ // Skip source files when adding to compile flags
+ StringRef extension = sys::path::extension(arg);
+ if (!arg.empty() && arg[0] != '-' &&
+ (extension == ".c" || extension == ".cpp" || extension == ".cc" ||
+ extension == ".cxx" || extension == ".C")) {
+ continue;
+ }
+ unit.compileFlags.push_back(arg);
+ }
+
+ // Extract output files and features
+ extractBuildInfo(args, unit);
+
+ return std::vector<CompilationUnitInfo>{unit};
+}
+
+std::vector<SourceFile>
+UnitDetector::findSourceFiles(const std::vector<std::string> &args) const {
+ std::vector<SourceFile> sources;
+
+ for (const auto &arg : args) {
+ if (arg.empty() || arg[0] == '-')
+ continue;
+
+ StringRef extension = sys::path::extension(arg);
+ if (extension == ".c" || extension == ".cpp" || extension == ".cc" ||
+ extension == ".cxx" || extension == ".C") {
+
+ SourceFile source;
+ source.path = arg;
+ source.language = classifier_.getLanguage(arg);
+ source.isHeader = false;
+ sources.push_back(source);
+ }
+ }
+
+ return sources;
+}
+
+void UnitDetector::extractBuildInfo(const std::vector<std::string> &args,
+ CompilationUnitInfo &unit) {
+ for (size_t i = 0; i < args.size(); ++i) {
+ const auto &arg = args[i];
+
+ if (arg == "-o" && i + 1 < args.size()) {
+ StringRef output = args[i + 1];
+ StringRef ext = sys::path::extension(output);
+ if (ext == ".o") {
+ unit.outputObject = args[i + 1];
+ } else {
+ unit.outputExecutable = args[i + 1];
+ }
+ }
+
+ if (arg.find("openmp") != std::string::npos ||
+ arg.find("offload") != std::string::npos ||
+ arg.find("cuda") != std::string::npos) {
+ unit.hasOffloading = true;
+ }
+
+ if (StringRef(arg).starts_with("-march=")) {
+ unit.targetArch = arg.substr(7);
+ }
+ }
+}
+
+std::string
+UnitDetector::generateUnitName(const std::vector<SourceFile> &sources) const {
+ if (sources.empty())
+ return "unknown";
+
+ // Use first source file name as base
+ std::string baseName = sys::path::stem(sources[0].path).str();
+
+ // Add hash for uniqueness when multiple sources
+ if (sources.size() > 1) {
+ std::string combined;
+ for (const auto &source : sources) {
+ combined += source.path;
+ }
+ auto hash = hash_value(combined);
+ baseName += "_" + std::to_string(static_cast<size_t>(hash) % 10000);
+ }
+
+ return baseName;
+}
+
+} // namespace advisor
+} // namespace llvm
diff --git a/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h
new file mode 100644
index 0000000000000..8ad998d3c4e7a
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/Detection/UnitDetector.h
@@ -0,0 +1,35 @@
+#ifndef LLVM_ADVISOR_UNIT_DETECTOR_H
+#define LLVM_ADVISOR_UNIT_DETECTOR_H
+
+#include "../Config/AdvisorConfig.h"
+#include "../Core/CompilationUnit.h"
+#include "../Utils/FileClassifier.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+namespace advisor {
+
+class UnitDetector {
+public:
+ explicit UnitDetector(const AdvisorConfig &config);
+
+ Expected<std::vector<CompilationUnitInfo>>
+ detectUnits(const std::string &compiler,
+ const std::vector<std::string> &args);
+
+private:
+ std::vector<SourceFile>
+ findSourceFiles(const std::vector<std::string> &args) const;
+ void extractBuildInfo(const std::vector<std::string> &args,
+ CompilationUnitInfo &unit);
+ std::string generateUnitName(const std::vector<SourceFile> &sources) const;
+
+ const AdvisorConfig &config_;
+ FileClassifier classifier_;
+};
+
+} // namespace advisor
+} // namespace llvm
+
+#endif
>From 0a0cf2157b931314fadbbc072ce537f5f4f9a7c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20C=C3=A1rdenas?= <miguelecsx at gmail.com>
Date: Sun, 13 Jul 2025 06:48:06 +0200
Subject: [PATCH 9/9] [llvm-advisor] Add main command-line driver
Adds the command-line front-end that handles advisor options, finds
the compiler commands, create configuration, and starts the build data collection.
---
llvm/tools/llvm-advisor/src/llvm-advisor.cpp | 111 +++++++++++++++++++
1 file changed, 111 insertions(+)
create mode 100644 llvm/tools/llvm-advisor/src/llvm-advisor.cpp
diff --git a/llvm/tools/llvm-advisor/src/llvm-advisor.cpp b/llvm/tools/llvm-advisor/src/llvm-advisor.cpp
new file mode 100644
index 0000000000000..01c28ba53b95b
--- /dev/null
+++ b/llvm/tools/llvm-advisor/src/llvm-advisor.cpp
@@ -0,0 +1,111 @@
+#include "Config/AdvisorConfig.h"
+#include "Core/CompilationManager.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::advisor;
+
+static cl::opt<std::string> ConfigFile("config", cl::desc("Configuration file"),
+ cl::value_desc("filename"));
+static cl::opt<std::string> OutputDir("output-dir",
+ cl::desc("Output directory"),
+ cl::value_desc("directory"));
+static cl::opt<bool> Verbose("verbose", cl::desc("Verbose output"));
+static cl::opt<bool> KeepTemps("keep-temps", cl::desc("Keep temporary files"));
+static cl::opt<bool> NoProfiler("no-profiler", cl::desc("Disable profiler"));
+
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+
+ // Parse llvm-advisor options until we find the compiler
+ std::vector<const char *> advisorArgs;
+ advisorArgs.push_back(argv[0]);
+
+ int compilerArgStart = 1;
+ bool foundCompiler = false;
+
+ for (int i = 1; i < argc; ++i) {
+ StringRef arg(argv[i]);
+ if (arg.starts_with("--") ||
+ (arg.starts_with("-") && arg.size() > 1 && arg != "-")) {
+ advisorArgs.push_back(argv[i]);
+ if (arg == "--config" || arg == "--output-dir") {
+ if (i + 1 < argc && !StringRef(argv[i + 1]).starts_with("-")) {
+ advisorArgs.push_back(argv[++i]);
+ }
+ }
+ } else {
+ compilerArgStart = i;
+ foundCompiler = true;
+ break;
+ }
+ }
+
+ if (!foundCompiler) {
+ errs() << "Error: No compiler command provided.\n";
+ errs() << "Usage: llvm-advisor [options] <compiler> [compiler-args...]\n";
+ return 1;
+ }
+
+ // Parse llvm-advisor options
+ int advisorArgc = advisorArgs.size();
+ cl::ParseCommandLineOptions(advisorArgc,
+ const_cast<char **>(advisorArgs.data()),
+ "LLVM Compilation Advisor");
+
+ // Extract compiler and arguments
+ std::string compiler = argv[compilerArgStart];
+ std::vector<std::string> compilerArgs;
+ for (int i = compilerArgStart + 1; i < argc; ++i) {
+ compilerArgs.push_back(argv[i]);
+ }
+
+ // Configure advisor
+ AdvisorConfig config;
+ if (!ConfigFile.empty()) {
+ if (auto Err = config.loadFromFile(ConfigFile).takeError()) {
+ errs() << "Error loading config: " << toString(std::move(Err)) << "\n";
+ return 1;
+ }
+ }
+
+ if (!OutputDir.empty()) {
+ config.setOutputDir(OutputDir);
+ } else {
+ config.setOutputDir(".llvm-advisor"); // Default hidden directory
+ }
+
+ config.setVerbose(Verbose);
+ config.setKeepTemps(KeepTemps);
+ config.setRunProfiler(!NoProfiler);
+
+ // Create output directory
+ if (auto EC = sys::fs::create_directories(config.getOutputDir())) {
+ errs() << "Error creating output directory: " << EC.message() << "\n";
+ return 1;
+ }
+
+ if (config.getVerbose()) {
+ outs() << "LLVM Compilation Advisor\n";
+ outs() << "Compiler: " << compiler << "\n";
+ outs() << "Output: " << config.getOutputDir() << "\n";
+ }
+
+ // Execute with data collection
+ CompilationManager manager(config);
+ auto result = manager.executeWithDataCollection(compiler, compilerArgs);
+
+ if (result) {
+ if (config.getVerbose()) {
+ outs() << "Compilation completed (exit code: " << *result << ")\n";
+ }
+ return *result;
+ } else {
+ errs() << "Error: " << toString(result.takeError()) << "\n";
+ return 1;
+ }
+}
More information about the llvm-commits
mailing list