[llvm] [llvm-ir2vec] Added Enum for ir2vec embedding mode (PR #190466)
Nishant Sachdeva via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 4 08:46:43 PDT 2026
https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/190466
>From 5422b32972164d9ade299fb9d0b25a9eff6920fe Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Sat, 4 Apr 2026 21:01:49 +0530
Subject: [PATCH 1/2] Added Enum for ir2vec mode
---
.../bindings/ir2vec-getBBEmbMap.py | 2 +-
.../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 2 +-
.../bindings/ir2vec-getFuncEmbMap.py | 2 +-
.../bindings/ir2vec-getFuncNames.py | 2 +-
.../bindings/ir2vec-getInstEmbMap.py | 2 +-
.../bindings/ir2vec-initEmbedding.py | 14 +++++------
llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp | 23 ++++++++++---------
7 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
index 333feadc6c932..a85876c7d8e9f 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -6,7 +6,7 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
# Success case
bb_map = tool.getBBEmbMap("conditional")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
index 61b9464c89757..782cbb4f65ae3 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -6,7 +6,7 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
# Success case
emb = tool.getFuncEmb("add")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
index 7600d5e4a2986..4235ed079c407 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -6,7 +6,7 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
# Success case
emb_map = tool.getFuncEmbMap()
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
index 432d80e97edb9..3e3048ead6e5f 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -6,7 +6,7 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
# Success case
func_names = tool.getFuncNames()
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
index 3157ae34cfd3c..f9bd4ffcc9e5d 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -6,7 +6,7 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
# Success case
inst_map = tool.getInstEmbMap("add")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
index c19935a0c6b7d..43c3db5d98c03 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -7,41 +7,41 @@
vocab_path = sys.argv[2]
# Success case
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
print(f"SUCCESS: {type(tool).__name__}")
# CHECK: SUCCESS: IR2VecTool
# Error: Invalid mode
try:
ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
-except ValueError:
+except TypeError:
print("ERROR: Invalid mode")
# CHECK: ERROR: Invalid mode
# Error: Empty vocab path
try:
- ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+ ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath="")
except ValueError:
print("ERROR: Empty vocab path")
# CHECK: ERROR: Empty vocab path
# Error: Invalid file
try:
- ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+ ir2vec.initEmbedding(filename="/bad.ll", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
except ValueError:
print("ERROR: Invalid file")
# CHECK: ERROR: Invalid file
# Error: Empty filename
try:
- ir2vec.initEmbedding(filename="", mode="sym", vocabPath=vocab_path)
+ ir2vec.initEmbedding(filename="", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
except ValueError:
print("ERROR: Empty filename")
# CHECK: ERROR: Empty filename
# Error: Invalid vocab file
try:
- ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+ ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath="/bad.json")
except ValueError:
print("ERROR: Invalid vocab")
# CHECK: ERROR: Invalid vocab
@@ -54,7 +54,7 @@
f.write("{ this is not valid json }")
bad_vocab = f.name
try:
- ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+ ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=bad_vocab)
except ValueError:
print("ERROR: Invalid vocab file")
finally:
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index 2f885b11519c7..d76510f15b081 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -46,15 +46,9 @@ class PyIR2VecTool {
IR2VecKind OutputEmbeddingMode;
public:
- PyIR2VecTool(const std::string &Filename, const std::string &Mode,
+ PyIR2VecTool(const std::string &Filename, IR2VecKind Mode,
const std::string &VocabPath) {
- OutputEmbeddingMode = [](const std::string &Mode) -> IR2VecKind {
- if (Mode == "sym")
- return IR2VecKind::Symbolic;
- if (Mode == "fa")
- return IR2VecKind::FlowAware;
- throw nb::value_error("Invalid mode. Use 'sym' or 'fa'");
- }(Mode);
+ OutputEmbeddingMode = Mode;
if (VocabPath.empty())
throw nb::value_error("Empty Vocab Path not allowed");
@@ -200,8 +194,15 @@ class PyIR2VecTool {
NB_MODULE(ir2vec, m) {
m.doc() = std::string("Python bindings for ") + ToolName;
+ nb::enum_<IR2VecKind>(m, "IR2VecKind",
+ "Embedding mode for IR2Vec representations")
+ .value("Symbolic", IR2VecKind::Symbolic, "Symbolic encodings only")
+ .value("FlowAware", IR2VecKind::FlowAware,
+ "Flow-aware encodings (includes data/control flow)")
+ .export_values();
+
nb::class_<PyIR2VecTool>(m, "IR2VecTool")
- .def(nb::init<const std::string &, const std::string &,
+ .def(nb::init<const std::string &, IR2VecKind,
const std::string &>(),
nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
.def("getFuncNames", &PyIR2VecTool::getFuncNames,
@@ -228,10 +229,10 @@ NB_MODULE(ir2vec, m) {
m.def(
"initEmbedding",
- [](const std::string &filename, const std::string &mode,
+ [](const std::string &filename, IR2VecKind mode,
const std::string &vocabPath) {
return std::make_unique<PyIR2VecTool>(filename, mode, vocabPath);
},
- nb::arg("filename"), nb::arg("mode") = "sym", nb::arg("vocabPath"),
+ nb::arg("filename"), nb::arg("mode") = IR2VecKind::Symbolic, nb::arg("vocabPath"),
nb::rv_policy::take_ownership);
}
>From 767c997d765e8c5ad0194db3c6df54d460ce2f1e Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Sat, 4 Apr 2026 21:16:26 +0530
Subject: [PATCH 2/2] Nit commit - formatting fixup
---
.../bindings/ir2vec-getBBEmbMap.py | 4 +++-
.../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 4 +++-
.../bindings/ir2vec-getFuncEmbMap.py | 4 +++-
.../bindings/ir2vec-getFuncNames.py | 4 +++-
.../bindings/ir2vec-getInstEmbMap.py | 4 +++-
.../bindings/ir2vec-initEmbedding.py | 24 ++++++++++++++-----
llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp | 11 ++++-----
7 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
index a85876c7d8e9f..57fe1b032c6fd 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -6,7 +6,9 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
# Success case
bb_map = tool.getBBEmbMap("conditional")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
index 782cbb4f65ae3..bb58b59e54825 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -6,7 +6,9 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
# Success case
emb = tool.getFuncEmb("add")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
index 4235ed079c407..03c7baab4d349 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -6,7 +6,9 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
# Success case
emb_map = tool.getFuncEmbMap()
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
index 3e3048ead6e5f..4bb19e9bd8115 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -6,7 +6,9 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
# Success case
func_names = tool.getFuncNames()
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
index f9bd4ffcc9e5d..b04222ce4943b 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -6,7 +6,9 @@
ll_file = sys.argv[1]
vocab_path = sys.argv[2]
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
# Success case
inst_map = tool.getInstEmbMap("add")
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
index 43c3db5d98c03..dcdcd90da0847 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -7,7 +7,9 @@
vocab_path = sys.argv[2]
# Success case
-tool = ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+tool = ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+)
print(f"SUCCESS: {type(tool).__name__}")
# CHECK: SUCCESS: IR2VecTool
@@ -20,28 +22,36 @@
# Error: Empty vocab path
try:
- ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath="")
+ ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=""
+ )
except ValueError:
print("ERROR: Empty vocab path")
# CHECK: ERROR: Empty vocab path
# Error: Invalid file
try:
- ir2vec.initEmbedding(filename="/bad.ll", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+ ir2vec.initEmbedding(
+ filename="/bad.ll", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+ )
except ValueError:
print("ERROR: Invalid file")
# CHECK: ERROR: Invalid file
# Error: Empty filename
try:
- ir2vec.initEmbedding(filename="", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path)
+ ir2vec.initEmbedding(
+ filename="", mode=ir2vec.IR2VecKind.Symbolic, vocabPath=vocab_path
+ )
except ValueError:
print("ERROR: Empty filename")
# CHECK: ERROR: Empty filename
# Error: Invalid vocab file
try:
- ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath="/bad.json")
+ ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath="/bad.json"
+ )
except ValueError:
print("ERROR: Invalid vocab")
# CHECK: ERROR: Invalid vocab
@@ -54,7 +64,9 @@
f.write("{ this is not valid json }")
bad_vocab = f.name
try:
- ir2vec.initEmbedding(filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=bad_vocab)
+ ir2vec.initEmbedding(
+ filename=ll_file, mode=ir2vec.IR2VecKind.Symbolic, vocabPath=bad_vocab
+ )
except ValueError:
print("ERROR: Invalid vocab file")
finally:
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index d76510f15b081..4daa383db0bce 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -195,15 +195,14 @@ NB_MODULE(ir2vec, m) {
m.doc() = std::string("Python bindings for ") + ToolName;
nb::enum_<IR2VecKind>(m, "IR2VecKind",
- "Embedding mode for IR2Vec representations")
+ "Embedding mode for IR2Vec representations")
.value("Symbolic", IR2VecKind::Symbolic, "Symbolic encodings only")
.value("FlowAware", IR2VecKind::FlowAware,
- "Flow-aware encodings (includes data/control flow)")
+ "Flow-aware encodings (includes data/control flow)")
.export_values();
nb::class_<PyIR2VecTool>(m, "IR2VecTool")
- .def(nb::init<const std::string &, IR2VecKind,
- const std::string &>(),
+ .def(nb::init<const std::string &, IR2VecKind, const std::string &>(),
nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
.def("getFuncNames", &PyIR2VecTool::getFuncNames,
"Get list of all defined functions in the module\n"
@@ -233,6 +232,6 @@ NB_MODULE(ir2vec, m) {
const std::string &vocabPath) {
return std::make_unique<PyIR2VecTool>(filename, mode, vocabPath);
},
- nb::arg("filename"), nb::arg("mode") = IR2VecKind::Symbolic, nb::arg("vocabPath"),
- nb::rv_policy::take_ownership);
+ nb::arg("filename"), nb::arg("mode") = IR2VecKind::Symbolic,
+ nb::arg("vocabPath"), nb::rv_policy::take_ownership);
}
More information about the llvm-commits
mailing list