[llvm] [llvm-ir2vec] Refactoring the ir2vec python bindings testing (PR #180664)
Nishant Sachdeva via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 20:47:27 PST 2026
https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/180664
>From 9dca89de1b9ff5c0b3044976179c9839a52344db Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 12:27:13 +0530
Subject: [PATCH 1/3] Adding getFuncNames API to ir2vec python bindings
---
.../tools/llvm-ir2vec/bindings/ir2vec-bindings.py | 10 ++++++++++
llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp | 11 +++++++++++
2 files changed, 21 insertions(+)
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index bb29d33dc8ca6..c9c241ccf90c0 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -12,6 +12,12 @@
print("SUCCESS: Tool initialized")
print(f"Tool type: {type(tool).__name__}")
+ # Test getFuncNames
+ print("\n=== Function Names ===")
+ func_names = tool.getFuncNames()
+ for func_name in sorted(func_names):
+ print(f"Function: {func_name}")
+
# Test getFuncEmbMap
print("\n=== Function Embeddings ===")
func_emb_map = tool.getFuncEmbMap()
@@ -57,6 +63,10 @@
# CHECK: SUCCESS: Tool initialized
# CHECK: Tool type: IR2VecTool
+# CHECK: === Function Names ===
+# CHECK: Function: add
+# CHECK: Function: conditional
+# CHECK: Function: multiply
# CHECK: === Function Embeddings ===
# CHECK: Function: add
# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index e4ddaf9c14e5a..df372aedb9b63 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -70,6 +70,14 @@ class PyIR2VecTool {
}
}
+ nb::list getFuncNames() {
+ nb::list NbFuncNames;
+ for (const Function &F : M->getFunctionDefs()) {
+ NbFuncNames.append(nb::str(F.getName().str().c_str()));
+ }
+ return NbFuncNames;
+ }
+
nb::dict getFuncEmbMap() {
auto ToolFuncEmbMap = Tool->getFunctionEmbeddingsMap(OutputEmbeddingMode);
@@ -196,6 +204,9 @@ NB_MODULE(ir2vec, m) {
.def(nb::init<const std::string &, const std::string &,
const std::string &>(),
nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
+ .def("getFuncNames", &PyIR2VecTool::getFuncNames,
+ "Get list of all defined functions in the module\n"
+ "Returns: list[str] - Function names")
.def("getFuncEmbMap", &PyIR2VecTool::getFuncEmbMap,
"Generate function-level embeddings for all functions\n"
"Returns: dict[str, ndarray[float64]] - "
>From 4705b1d9bc8d290f437d95efbcb8e7519740087d Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Fri, 6 Mar 2026 10:15:19 +0530
Subject: [PATCH 2/3] Adding a function declaration to input test to ensure
only definitions are listed, and other nits
---
llvm/test/tools/llvm-ir2vec/Inputs/input.ll | 3 +++
llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py | 1 +
llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp | 4 ++--
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/llvm-ir2vec/Inputs/input.ll b/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
index 93e77be51b8e9..c33d6e9ee7678 100644
--- a/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
+++ b/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
@@ -1,3 +1,6 @@
+; Function declaration - should be excluded from all IR2Vec outputs
+declare i32 @external_func(i32 %x)
+
define i32 @add(i32 %a, i32 %b) {
entry:
%sum = add i32 %a, %b
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index c9c241ccf90c0..d3a1cdd6591ad 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -67,6 +67,7 @@
# CHECK: Function: add
# CHECK: Function: conditional
# CHECK: Function: multiply
+# CHECK-NOT: Function: external_func
# CHECK: === Function Embeddings ===
# CHECK: Function: add
# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index df372aedb9b63..2f885b11519c7 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -72,9 +72,9 @@ class PyIR2VecTool {
nb::list getFuncNames() {
nb::list NbFuncNames;
- for (const Function &F : M->getFunctionDefs()) {
+ for (const Function &F : M->getFunctionDefs())
NbFuncNames.append(nb::str(F.getName().str().c_str()));
- }
+
return NbFuncNames;
}
>From 46849acf790e3b82ac28d5799fa3bdf9aec14bb4 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 19:42:23 +0530
Subject: [PATCH 3/3] Refactoring the ir2vec python bindings testing to make it
more modular and thorough
---
.../llvm-ir2vec/bindings/ir2vec-exceptions.py | 38 -------------
.../bindings/ir2vec-getBBEmbMap.py | 25 +++++++++
.../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 21 ++++++++
.../bindings/ir2vec-getFuncEmbMap.py | 18 +++++++
.../bindings/ir2vec-getFuncNames.py | 18 +++++++
.../bindings/ir2vec-getInstEmbMap.py | 27 ++++++++++
.../bindings/ir2vec-initEmbedding.py | 54 +++++++++++++++++++
7 files changed, 163 insertions(+), 38 deletions(-)
delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
deleted file mode 100644
index af96be07c2364..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s | FileCheck %s
-
-import ir2vec
-
-
-def test_invalid_file():
- """Test that invalid file path raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(
- filename="/this/does/not/exist.ll",
- mode="sym",
- vocabPath="/also/fake/vocab.json",
- )
- return "FAIL: No exception raised"
- except ValueError as e:
- return f"PASS: ValueError raised - {str(e)[:40]}"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-def test_empty_filename():
- """Test that empty filename raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(filename="", mode="sym", vocabPath="dummy.json")
- return "FAIL: No exception raised"
- except ValueError:
- return "PASS: ValueError raised for empty filename"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-result1 = test_invalid_file()
-print(f"Test 1: {result1}")
-# CHECK: Test 1: PASS: ValueError raised
-
-result2 = test_empty_filename()
-print(f"Test 2: {result2}")
-# CHECK: Test 2: PASS: ValueError raised
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
new file mode 100644
index 0000000000000..415046a391dc7
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -0,0 +1,25 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+bb_map = tool.getBBEmbMap("conditional")
+for bb in sorted(bb_map.keys()):
+ print(f"BB: {bb}, EMB: {bb_map[bb].tolist()}")
+# CHECK: BB: entry, EMB: [161.20000000298023, 163.20000000298023, 165.20000000298023]
+# CHECK: BB: exit, EMB: [164.0, 166.0, 168.0]
+# CHECK: BB: negative, EMB: [47.0, 49.0, 51.0]
+# CHECK: BB: positive, EMB: [41.0, 43.0, 45.0]
+
+# Error: Function not found
+try:
+ tool.getBBEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
new file mode 100644
index 0000000000000..9f72870408837
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -0,0 +1,21 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb = tool.getFuncEmb("add")
+print(f"SUCCESS: {emb.tolist()}")
+# CHECK: SUCCESS: [38.0, 40.0, 42.0]
+
+# Error: Function not found
+try:
+ tool.getFuncEmb("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
new file mode 100644
index 0000000000000..a306a652ac9bd
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb_map = tool.getFuncEmbMap()
+for name in sorted(emb_map.keys()):
+ print(f"FUNC: {name}, EMB: {emb_map[name].tolist()}")
+
+# CHECK: FUNC: add, EMB: [38.0, 40.0, 42.0]
+# CHECK: FUNC: conditional, EMB: [413.20000000298023, 421.20000000298023, 429.20000000298023]
+# CHECK: FUNC: multiply, EMB: [50.0, 52.0, 54.0]
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
new file mode 100644
index 0000000000000..b121d24ba896f
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+func_names = tool.getFuncNames()
+for name in sorted(func_names):
+ print(f"FUNC: {name}")
+
+# CHECK: FUNC: add
+# CHECK: FUNC: conditional
+# CHECK: FUNC: multiply
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
new file mode 100644
index 0000000000000..1af41a803e551
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -0,0 +1,27 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+inst_map = tool.getInstEmbMap("add")
+for inst in sorted(inst_map.keys()):
+ print(f"INST: {inst}")
+ print(f" EMB: {inst_map[inst].tolist()}")
+
+# CHECK: INST: %sum = add i32 %a, %b
+# CHECK: EMB: [37.0, 38.0, 39.0]
+# CHECK: INST: ret i32 %sum
+# CHECK: EMB: [1.0, 2.0, 3.0]
+
+# Error: Function not found
+try:
+ tool.getInstEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
new file mode 100644
index 0000000000000..f35c118a3c3d1
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -0,0 +1,54 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+# Success case
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+print(f"SUCCESS: {type(tool).__name__}")
+# CHECK: SUCCESS: IR2VecTool
+
+# Error: Invalid mode
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid mode")
+# CHECK: ERROR: Invalid mode
+
+# Error: Empty vocab path
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+except ValueError:
+ print("ERROR: Empty vocab path")
+# CHECK: ERROR: Empty vocab path
+
+# Error: Invalid file
+try:
+ ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid file")
+# CHECK: ERROR: Invalid file
+
+# Error: Invalid vocab file
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+except ValueError:
+ print("ERROR: Invalid vocab")
+# CHECK: ERROR: Invalid vocab
+
+# Error: Malformed JSON vocab
+import tempfile
+import os
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+ f.write("{ this is not valid json }")
+ bad_vocab = f.name
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+except ValueError:
+ print("ERROR: Invalid vocab file")
+finally:
+ os.unlink(bad_vocab)
+# CHECK: ERROR: Invalid vocab file
\ No newline at end of file
More information about the llvm-commits
mailing list