[llvm] [llvm-ir2vec] Refactoring the ir2vec python bindings testing (PR #180664)

Nishant Sachdeva via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 20:47:27 PST 2026


https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/180664

>From 9dca89de1b9ff5c0b3044976179c9839a52344db Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 12:27:13 +0530
Subject: [PATCH 1/3] Adding getFuncNames API to ir2vec python bindings

---
 .../tools/llvm-ir2vec/bindings/ir2vec-bindings.py     | 10 ++++++++++
 llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp          | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index bb29d33dc8ca6..c9c241ccf90c0 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -12,6 +12,12 @@
     print("SUCCESS: Tool initialized")
     print(f"Tool type: {type(tool).__name__}")
 
+    # Test getFuncNames
+    print("\n=== Function Names ===")
+    func_names = tool.getFuncNames()
+    for func_name in sorted(func_names):
+        print(f"Function: {func_name}")
+
     # Test getFuncEmbMap
     print("\n=== Function Embeddings ===")
     func_emb_map = tool.getFuncEmbMap()
@@ -57,6 +63,10 @@
 
 # CHECK: SUCCESS: Tool initialized
 # CHECK: Tool type: IR2VecTool
+# CHECK: === Function Names ===
+# CHECK: Function: add
+# CHECK: Function: conditional
+# CHECK: Function: multiply
 # CHECK: === Function Embeddings ===
 # CHECK: Function: add
 # CHECK-NEXT:   Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index e4ddaf9c14e5a..df372aedb9b63 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -70,6 +70,14 @@ class PyIR2VecTool {
     }
   }
 
+  nb::list getFuncNames() {
+    nb::list NbFuncNames;
+    for (const Function &F : M->getFunctionDefs()) {
+      NbFuncNames.append(nb::str(F.getName().str().c_str()));
+    }
+    return NbFuncNames;
+  }
+
   nb::dict getFuncEmbMap() {
     auto ToolFuncEmbMap = Tool->getFunctionEmbeddingsMap(OutputEmbeddingMode);
 
@@ -196,6 +204,9 @@ NB_MODULE(ir2vec, m) {
       .def(nb::init<const std::string &, const std::string &,
                     const std::string &>(),
            nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
+      .def("getFuncNames", &PyIR2VecTool::getFuncNames,
+           "Get list of all defined functions in the module\n"
+           "Returns: list[str] - Function names")
       .def("getFuncEmbMap", &PyIR2VecTool::getFuncEmbMap,
            "Generate function-level embeddings for all functions\n"
            "Returns: dict[str, ndarray[float64]] - "

>From 4705b1d9bc8d290f437d95efbcb8e7519740087d Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Fri, 6 Mar 2026 10:15:19 +0530
Subject: [PATCH 2/3] Adding a function declaration to input test to ensure
 only definitions are listed, and other nits

---
 llvm/test/tools/llvm-ir2vec/Inputs/input.ll             | 3 +++
 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py | 1 +
 llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp            | 4 ++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/llvm-ir2vec/Inputs/input.ll b/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
index 93e77be51b8e9..c33d6e9ee7678 100644
--- a/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
+++ b/llvm/test/tools/llvm-ir2vec/Inputs/input.ll
@@ -1,3 +1,6 @@
+; Function declaration - should be excluded from all IR2Vec outputs
+declare i32 @external_func(i32 %x)
+
 define i32 @add(i32 %a, i32 %b) {
 entry:
   %sum = add i32 %a, %b
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index c9c241ccf90c0..d3a1cdd6591ad 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -67,6 +67,7 @@
 # CHECK: Function: add
 # CHECK: Function: conditional
 # CHECK: Function: multiply
+# CHECK-NOT:  Function: external_func
 # CHECK: === Function Embeddings ===
 # CHECK: Function: add
 # CHECK-NEXT:   Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index df372aedb9b63..2f885b11519c7 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -72,9 +72,9 @@ class PyIR2VecTool {
 
   nb::list getFuncNames() {
     nb::list NbFuncNames;
-    for (const Function &F : M->getFunctionDefs()) {
+    for (const Function &F : M->getFunctionDefs())
       NbFuncNames.append(nb::str(F.getName().str().c_str()));
-    }
+
     return NbFuncNames;
   }
 

>From 46849acf790e3b82ac28d5799fa3bdf9aec14bb4 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 19:42:23 +0530
Subject: [PATCH 3/3] Refactoring the ir2vec python bindings testing to make it
 more modular and thorough

---
 .../llvm-ir2vec/bindings/ir2vec-exceptions.py | 38 -------------
 .../bindings/ir2vec-getBBEmbMap.py            | 25 +++++++++
 .../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 21 ++++++++
 .../bindings/ir2vec-getFuncEmbMap.py          | 18 +++++++
 .../bindings/ir2vec-getFuncNames.py           | 18 +++++++
 .../bindings/ir2vec-getInstEmbMap.py          | 27 ++++++++++
 .../bindings/ir2vec-initEmbedding.py          | 54 +++++++++++++++++++
 7 files changed, 163 insertions(+), 38 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py

diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
deleted file mode 100644
index af96be07c2364..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s | FileCheck %s
-
-import ir2vec
-
-
-def test_invalid_file():
-    """Test that invalid file path raises ValueError"""
-    try:
-        tool = ir2vec.initEmbedding(
-            filename="/this/does/not/exist.ll",
-            mode="sym",
-            vocabPath="/also/fake/vocab.json",
-        )
-        return "FAIL: No exception raised"
-    except ValueError as e:
-        return f"PASS: ValueError raised - {str(e)[:40]}"
-    except Exception as e:
-        return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-def test_empty_filename():
-    """Test that empty filename raises ValueError"""
-    try:
-        tool = ir2vec.initEmbedding(filename="", mode="sym", vocabPath="dummy.json")
-        return "FAIL: No exception raised"
-    except ValueError:
-        return "PASS: ValueError raised for empty filename"
-    except Exception as e:
-        return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-result1 = test_invalid_file()
-print(f"Test 1: {result1}")
-# CHECK: Test 1: PASS: ValueError raised
-
-result2 = test_empty_filename()
-print(f"Test 2: {result2}")
-# CHECK: Test 2: PASS: ValueError raised
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
new file mode 100644
index 0000000000000..415046a391dc7
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -0,0 +1,25 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+bb_map = tool.getBBEmbMap("conditional")
+for bb in sorted(bb_map.keys()):
+    print(f"BB: {bb}, EMB: {bb_map[bb].tolist()}")
+# CHECK: BB: entry, EMB: [161.20000000298023, 163.20000000298023, 165.20000000298023]
+# CHECK: BB: exit, EMB: [164.0, 166.0, 168.0]
+# CHECK: BB: negative, EMB: [47.0, 49.0, 51.0]
+# CHECK: BB: positive, EMB: [41.0, 43.0, 45.0]
+
+# Error: Function not found
+try:
+    tool.getBBEmbMap("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
new file mode 100644
index 0000000000000..9f72870408837
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -0,0 +1,21 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb = tool.getFuncEmb("add")
+print(f"SUCCESS: {emb.tolist()}")
+# CHECK: SUCCESS: [38.0, 40.0, 42.0]
+
+# Error: Function not found
+try:
+    tool.getFuncEmb("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
new file mode 100644
index 0000000000000..a306a652ac9bd
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb_map = tool.getFuncEmbMap()
+for name in sorted(emb_map.keys()):
+    print(f"FUNC: {name}, EMB: {emb_map[name].tolist()}")
+
+# CHECK: FUNC: add, EMB: [38.0, 40.0, 42.0]
+# CHECK: FUNC: conditional, EMB: [413.20000000298023, 421.20000000298023, 429.20000000298023]
+# CHECK: FUNC: multiply, EMB: [50.0, 52.0, 54.0]
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
new file mode 100644
index 0000000000000..b121d24ba896f
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+func_names = tool.getFuncNames()
+for name in sorted(func_names):
+    print(f"FUNC: {name}")
+
+# CHECK: FUNC: add
+# CHECK: FUNC: conditional
+# CHECK: FUNC: multiply
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
new file mode 100644
index 0000000000000..1af41a803e551
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -0,0 +1,27 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+inst_map = tool.getInstEmbMap("add")
+for inst in sorted(inst_map.keys()):
+    print(f"INST: {inst}")
+    print(f"  EMB: {inst_map[inst].tolist()}")
+
+# CHECK: INST: %sum = add i32 %a, %b
+# CHECK:   EMB: [37.0, 38.0, 39.0]
+# CHECK: INST: ret i32 %sum
+# CHECK:   EMB: [1.0, 2.0, 3.0]
+
+# Error: Function not found
+try:
+    tool.getInstEmbMap("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
new file mode 100644
index 0000000000000..f35c118a3c3d1
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -0,0 +1,54 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+# Success case
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+print(f"SUCCESS: {type(tool).__name__}")
+# CHECK: SUCCESS: IR2VecTool
+
+# Error: Invalid mode
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
+except ValueError:
+    print("ERROR: Invalid mode")
+# CHECK: ERROR: Invalid mode
+
+# Error: Empty vocab path
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+except ValueError:
+    print("ERROR: Empty vocab path")
+# CHECK: ERROR: Empty vocab path
+
+# Error: Invalid file
+try:
+    ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+except ValueError:
+    print("ERROR: Invalid file")
+# CHECK: ERROR: Invalid file
+
+# Error: Invalid vocab file
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+except ValueError:
+    print("ERROR: Invalid vocab")
+# CHECK: ERROR: Invalid vocab
+
+# Error: Malformed JSON vocab
+import tempfile
+import os
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+    f.write("{ this is not valid json }")
+    bad_vocab = f.name
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+except ValueError:
+    print("ERROR: Invalid vocab file")
+finally:
+    os.unlink(bad_vocab)
+# CHECK: ERROR: Invalid vocab file
\ No newline at end of file



More information about the llvm-commits mailing list