[llvm-branch-commits] [llvm] Llvm ir2vec vocab read refactor (PR #177361)
Nishant Sachdeva via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jan 22 05:44:13 PST 2026
https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/177361
>From 7e128fd03fc036753ef0c3a659f1b79e9303af68 Mon Sep 17 00:00:00 2001
From: nishant_sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Thu, 22 Jan 2026 19:07:42 +0530
Subject: [PATCH] Modifying llvm-ir2vec vocab reading pipeline to use
Vocabulary::fromFile instead of a full pass invocation
---
llvm/include/llvm/Analysis/IR2Vec.h | 1 +
llvm/lib/Analysis/IR2Vec.cpp | 2 +-
llvm/tools/llvm-ir2vec/lib/Utils.cpp | 18 ++++++++++--------
llvm/tools/llvm-ir2vec/lib/Utils.h | 4 ++--
llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp | 9 ++++++++-
5 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index 2bf1c2adc0d4f..a7f88d9efdc3d 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -77,6 +77,7 @@ LLVM_ABI extern cl::opt<float> OpcWeight;
LLVM_ABI extern cl::opt<float> TypeWeight;
LLVM_ABI extern cl::opt<float> ArgWeight;
LLVM_ABI extern cl::opt<IR2VecKind> IR2VecEmbeddingKind;
+LLVM_ABI extern cl::opt<std::string> VocabFile;
/// Embedding is a datatype that wraps std::vector<double>. It provides
/// additional functionality for arithmetic and comparison operations.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 4c187fe9ce804..c421926d12ab1 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -40,7 +40,7 @@ namespace ir2vec {
cl::OptionCategory IR2VecCategory("IR2Vec Options");
// FIXME: Use a default vocab when not specified
-static cl::opt<std::string>
+cl::opt<std::string>
VocabFile("ir2vec-vocab-path", cl::Optional,
cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
cl::cat(IR2VecCategory));
diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.cpp b/llvm/tools/llvm-ir2vec/lib/Utils.cpp
index a655ae069f642..c7c10ee1ff43f 100644
--- a/llvm/tools/llvm-ir2vec/lib/Utils.cpp
+++ b/llvm/tools/llvm-ir2vec/lib/Utils.cpp
@@ -41,14 +41,16 @@ namespace llvm {
namespace ir2vec {
-bool IR2VecTool::initializeVocabulary() {
- // Register and run the IR2Vec vocabulary analysis
- // The vocabulary file path is specified via --ir2vec-vocab-path global
- // option
- MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
- MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
- // This will throw an error if vocab is not found or invalid
- Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
+bool IR2VecTool::initializeVocabulary(StringRef VocabPath) {
+ auto VocabOrErr = Vocabulary::fromFile(VocabPath);
+
+ if (!VocabOrErr) {
+ llvm::errs() << "Failed to load vocabulary: "
+ << toString(VocabOrErr.takeError()) << "\n";
+ return false;
+ }
+
+ Vocab = std::make_unique<Vocabulary>(std::move(*VocabOrErr));
return Vocab->isValid();
}
diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.h b/llvm/tools/llvm-ir2vec/lib/Utils.h
index 34474b7808463..29e8ce4f1c0ad 100644
--- a/llvm/tools/llvm-ir2vec/lib/Utils.h
+++ b/llvm/tools/llvm-ir2vec/lib/Utils.h
@@ -87,13 +87,13 @@ class IR2VecTool {
private:
Module &M;
ModuleAnalysisManager MAM;
- const Vocabulary *Vocab = nullptr;
+ std::unique_ptr<Vocabulary> Vocab;
public:
explicit IR2VecTool(Module &M) : M(M) {}
/// Initialize the IR2Vec vocabulary analysis
- bool initializeVocabulary();
+ bool initializeVocabulary(StringRef VocabPath);
/// Generate triplets for a single function
/// Returns a TripletResult with:
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index d240e7c6e5201..e8d7d9b19cd8c 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -153,7 +153,14 @@ static Error processModule(Module &M, raw_ostream &OS) {
if (EmbeddingsSubCmd) {
// Initialize vocabulary for embedding generation
// Note: Requires --ir2vec-vocab-path option to be set
- auto VocabStatus = Tool.initializeVocabulary();
+ // and this value will be populated in the var VocabFile
+ if (VocabFile.empty()) {
+ return createStringError(
+ errc::invalid_argument,
+ "IR2Vec vocabulary file path not specified; "
+ "You may need to set it using --ir2vec-vocab-path");
+ }
+ auto VocabStatus = Tool.initializeVocabulary(VocabFile);
assert(VocabStatus && "Failed to initialize IR2Vec vocabulary");
(void)VocabStatus;
More information about the llvm-branch-commits
mailing list