[llvm] [llvm-ir2vec] Refactoring the ir2vec python bindings testing (PR #180664)
Nishant Sachdeva via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 09:50:37 PST 2026
https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/180664
>From 8790834286fabc1a17b4a013172f2eb66a1aef5a Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 12:27:13 +0530
Subject: [PATCH 1/2] Adding getFuncNames API to ir2vec python bindings
---
.../tools/llvm-ir2vec/bindings/ir2vec-bindings.py | 10 ++++++++++
llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp | 11 +++++++++++
2 files changed, 21 insertions(+)
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index bb29d33dc8ca6..c9c241ccf90c0 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -12,6 +12,12 @@
print("SUCCESS: Tool initialized")
print(f"Tool type: {type(tool).__name__}")
+ # Test getFuncNames
+ print("\n=== Function Names ===")
+ func_names = tool.getFuncNames()
+ for func_name in sorted(func_names):
+ print(f"Function: {func_name}")
+
# Test getFuncEmbMap
print("\n=== Function Embeddings ===")
func_emb_map = tool.getFuncEmbMap()
@@ -57,6 +63,10 @@
# CHECK: SUCCESS: Tool initialized
# CHECK: Tool type: IR2VecTool
+# CHECK: === Function Names ===
+# CHECK: Function: add
+# CHECK: Function: conditional
+# CHECK: Function: multiply
# CHECK: === Function Embeddings ===
# CHECK: Function: add
# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index e4ddaf9c14e5a..df372aedb9b63 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -70,6 +70,14 @@ class PyIR2VecTool {
}
}
+ nb::list getFuncNames() {
+ nb::list NbFuncNames;
+ for (const Function &F : M->getFunctionDefs()) {
+ NbFuncNames.append(nb::str(F.getName().str().c_str()));
+ }
+ return NbFuncNames;
+ }
+
nb::dict getFuncEmbMap() {
auto ToolFuncEmbMap = Tool->getFunctionEmbeddingsMap(OutputEmbeddingMode);
@@ -196,6 +204,9 @@ NB_MODULE(ir2vec, m) {
.def(nb::init<const std::string &, const std::string &,
const std::string &>(),
nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
+ .def("getFuncNames", &PyIR2VecTool::getFuncNames,
+ "Get list of all defined functions in the module\n"
+ "Returns: list[str] - Function names")
.def("getFuncEmbMap", &PyIR2VecTool::getFuncEmbMap,
"Generate function-level embeddings for all functions\n"
"Returns: dict[str, ndarray[float64]] - "
>From f6886efad108cd8ac2df97c1f8ad57fbb0055690 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 19:42:23 +0530
Subject: [PATCH 2/2] Refactoring the ir2vec python bindings testing to make it
more modular and thorough
---
.../llvm-ir2vec/bindings/ir2vec-bindings.py | 125 ------------------
.../llvm-ir2vec/bindings/ir2vec-exceptions.py | 38 ------
.../bindings/ir2vec-getBBEmbMap.py | 25 ++++
.../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 21 +++
.../bindings/ir2vec-getFuncEmbMap.py | 18 +++
.../bindings/ir2vec-getFuncNames.py | 18 +++
.../bindings/ir2vec-getInstEmbMap.py | 27 ++++
.../bindings/ir2vec-initEmbedding.py | 54 ++++++++
8 files changed, 163 insertions(+), 163 deletions(-)
delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
deleted file mode 100644
index c9c241ccf90c0..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
-
-import sys
-import ir2vec
-
-ll_file = sys.argv[1]
-vocab_path = sys.argv[2]
-
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
-
-if tool is not None:
- print("SUCCESS: Tool initialized")
- print(f"Tool type: {type(tool).__name__}")
-
- # Test getFuncNames
- print("\n=== Function Names ===")
- func_names = tool.getFuncNames()
- for func_name in sorted(func_names):
- print(f"Function: {func_name}")
-
- # Test getFuncEmbMap
- print("\n=== Function Embeddings ===")
- func_emb_map = tool.getFuncEmbMap()
-
- # Sorting the function names for fixed-ordered output
- for func_name in sorted(func_emb_map.keys()):
- emb = func_emb_map[func_name]
- print(f"Function: {func_name}")
- print(f" Embedding: {emb.tolist()}")
-
- # Test getFuncEmb for individual functions
- print("\n=== Single Function Embeddings ===")
-
- # Test valid function names
- for func_name in ["add", "multiply", "conditional"]:
- func_emb = tool.getFuncEmb(func_name)
- print(f"Function: {func_name}")
- print(f" Embedding: {func_emb.tolist()}")
-
- # Test getBBEmbMap
- print("\n=== Basic Block Embeddings ===")
-
- # Test valid function names in sorted order
- for func_name in sorted(["add", "multiply", "conditional"]):
- bb_emb_map = tool.getBBEmbMap(func_name)
- print(f"Function: {func_name}")
- for bb_name in sorted(bb_emb_map.keys()):
- emb = bb_emb_map[bb_name]
- print(f" BB: {bb_name}")
- print(f" Embedding: {emb.tolist()}")
-
- # Test getInstEmbMap
- print("\n=== Instruction Embeddings ===")
-
- # Test valid function names in sorted order
- for func_name in sorted(["add", "multiply", "conditional"]):
- inst_emb_map = tool.getInstEmbMap(func_name)
- print(f"Function: {func_name}")
- for inst_str in sorted(inst_emb_map.keys()):
- emb = inst_emb_map[inst_str]
- print(f" Inst: {inst_str}")
- print(f" Embedding: {emb.tolist()}")
-
-# CHECK: SUCCESS: Tool initialized
-# CHECK: Tool type: IR2VecTool
-# CHECK: === Function Names ===
-# CHECK: Function: add
-# CHECK: Function: conditional
-# CHECK: Function: multiply
-# CHECK: === Function Embeddings ===
-# CHECK: Function: add
-# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: conditional
-# CHECK-NEXT: Embedding: [413.20000000298023, 421.20000000298023, 429.20000000298023]
-# CHECK: Function: multiply
-# CHECK-NEXT: Embedding: [50.0, 52.0, 54.0]
-# CHECK: === Single Function Embeddings ===
-# CHECK: Function: add
-# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: multiply
-# CHECK-NEXT: Embedding: [50.0, 52.0, 54.0]
-# CHECK: Function: conditional
-# CHECK-NEXT: Embedding: [413.20000000298023, 421.20000000298023, 429.20000000298023]
-# CHECK: === Basic Block Embeddings ===
-# CHECK: Function: add
-# CHECK: BB: entry
-# CHECK-NEXT: Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: conditional
-# CHECK: BB: entry
-# CHECK-NEXT: Embedding: [161.20000000298023, 163.20000000298023, 165.20000000298023]
-# CHECK: BB: exit
-# CHECK-NEXT: Embedding: [164.0, 166.0, 168.0]
-# CHECK: BB: negative
-# CHECK-NEXT: Embedding: [47.0, 49.0, 51.0]
-# CHECK: BB: positive
-# CHECK-NEXT: Embedding: [41.0, 43.0, 45.0]
-# CHECK: Function: multiply
-# CHECK: BB: entry
-# CHECK-NEXT: Embedding: [50.0, 52.0, 54.0]
-# CHECK: === Instruction Embeddings ===
-# CHECK: Function: add
-# CHECK: Inst: %sum = add i32 %a, %b
-# CHECK-NEXT: Embedding: [37.0, 38.0, 39.0]
-# CHECK: Inst: ret i32 %sum
-# CHECK-NEXT: Embedding: [1.0, 2.0, 3.0]
-# CHECK: Function: conditional
-# CHECK: Inst: %cmp = icmp sgt i32 %n, 0
-# CHECK-NEXT: Embedding: [157.20000000298023, 158.20000000298023, 159.20000000298023]
-# CHECK: Inst: %neg_val = sub i32 %n, 10
-# CHECK-NEXT: Embedding: [43.0, 44.0, 45.0]
-# CHECK: Inst: %pos_val = add i32 %n, 10
-# CHECK-NEXT: Embedding: [37.0, 38.0, 39.0]
-# CHECK: Inst: %result = phi i32 [ %pos_val, %positive ], [ %neg_val, %negative ]
-# CHECK-NEXT: Embedding: [163.0, 164.0, 165.0]
-# CHECK: Inst: br i1 %cmp, label %positive, label %negative
-# CHECK-NEXT: Embedding: [4.0, 5.0, 6.0]
-# CHECK: Inst: br label %exit
-# CHECK-NEXT: Embedding: [4.0, 5.0, 6.0]
-# CHECK: Inst: ret i32 %result
-# CHECK-NEXT: Embedding: [1.0, 2.0, 3.0]
-# CHECK: Function: multiply
-# CHECK: Inst: %prod = mul i32 %x, %y
-# CHECK-NEXT: Embedding: [49.0, 50.0, 51.0]
-# CHECK: Inst: ret i32 %prod
-# CHECK-NEXT: Embedding: [1.0, 2.0, 3.0]
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
deleted file mode 100644
index af96be07c2364..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s | FileCheck %s
-
-import ir2vec
-
-
-def test_invalid_file():
- """Test that invalid file path raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(
- filename="/this/does/not/exist.ll",
- mode="sym",
- vocabPath="/also/fake/vocab.json",
- )
- return "FAIL: No exception raised"
- except ValueError as e:
- return f"PASS: ValueError raised - {str(e)[:40]}"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-def test_empty_filename():
- """Test that empty filename raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(filename="", mode="sym", vocabPath="dummy.json")
- return "FAIL: No exception raised"
- except ValueError:
- return "PASS: ValueError raised for empty filename"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-result1 = test_invalid_file()
-print(f"Test 1: {result1}")
-# CHECK: Test 1: PASS: ValueError raised
-
-result2 = test_empty_filename()
-print(f"Test 2: {result2}")
-# CHECK: Test 2: PASS: ValueError raised
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
new file mode 100644
index 0000000000000..415046a391dc7
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -0,0 +1,25 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+bb_map = tool.getBBEmbMap("conditional")
+for bb in sorted(bb_map.keys()):
+ print(f"BB: {bb}, EMB: {bb_map[bb].tolist()}")
+# CHECK: BB: entry, EMB: [161.20000000298023, 163.20000000298023, 165.20000000298023]
+# CHECK: BB: exit, EMB: [164.0, 166.0, 168.0]
+# CHECK: BB: negative, EMB: [47.0, 49.0, 51.0]
+# CHECK: BB: positive, EMB: [41.0, 43.0, 45.0]
+
+# Error: Function not found
+try:
+ tool.getBBEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
new file mode 100644
index 0000000000000..9f72870408837
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -0,0 +1,21 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb = tool.getFuncEmb("add")
+print(f"SUCCESS: {emb.tolist()}")
+# CHECK: SUCCESS: [38.0, 40.0, 42.0]
+
+# Error: Function not found
+try:
+ tool.getFuncEmb("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
new file mode 100644
index 0000000000000..a306a652ac9bd
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb_map = tool.getFuncEmbMap()
+for name in sorted(emb_map.keys()):
+ print(f"FUNC: {name}, EMB: {emb_map[name].tolist()}")
+
+# CHECK: FUNC: add, EMB: [38.0, 40.0, 42.0]
+# CHECK: FUNC: conditional, EMB: [413.20000000298023, 421.20000000298023, 429.20000000298023]
+# CHECK: FUNC: multiply, EMB: [50.0, 52.0, 54.0]
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
new file mode 100644
index 0000000000000..b121d24ba896f
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+func_names = tool.getFuncNames()
+for name in sorted(func_names):
+ print(f"FUNC: {name}")
+
+# CHECK: FUNC: add
+# CHECK: FUNC: conditional
+# CHECK: FUNC: multiply
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
new file mode 100644
index 0000000000000..1af41a803e551
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -0,0 +1,27 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+inst_map = tool.getInstEmbMap("add")
+for inst in sorted(inst_map.keys()):
+ print(f"INST: {inst}")
+ print(f" EMB: {inst_map[inst].tolist()}")
+
+# CHECK: INST: %sum = add i32 %a, %b
+# CHECK: EMB: [37.0, 38.0, 39.0]
+# CHECK: INST: ret i32 %sum
+# CHECK: EMB: [1.0, 2.0, 3.0]
+
+# Error: Function not found
+try:
+ tool.getInstEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
new file mode 100644
index 0000000000000..f35c118a3c3d1
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -0,0 +1,54 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+# Success case
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+print(f"SUCCESS: {type(tool).__name__}")
+# CHECK: SUCCESS: IR2VecTool
+
+# Error: Invalid mode
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid mode")
+# CHECK: ERROR: Invalid mode
+
+# Error: Empty vocab path
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+except ValueError:
+ print("ERROR: Empty vocab path")
+# CHECK: ERROR: Empty vocab path
+
+# Error: Invalid file
+try:
+ ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid file")
+# CHECK: ERROR: Invalid file
+
+# Error: Invalid vocab file
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+except ValueError:
+ print("ERROR: Invalid vocab")
+# CHECK: ERROR: Invalid vocab
+
+# Error: Malformed JSON vocab
+import tempfile
+import os
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+ f.write("{ this is not valid json }")
+ bad_vocab = f.name
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+except ValueError:
+ print("ERROR: Invalid vocab file")
+finally:
+ os.unlink(bad_vocab)
+# CHECK: ERROR: Invalid vocab file
\ No newline at end of file
More information about the llvm-commits
mailing list