[llvm] [llvm-ir2vec] Refactoring the ir2vec python bindings testing (PR #180664)

Nishant Sachdeva via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 09:50:37 PST 2026


https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/180664

>From 8790834286fabc1a17b4a013172f2eb66a1aef5a Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 12:27:13 +0530
Subject: [PATCH 1/2] Adding getFuncNames API to ir2vec python bindings

---
 .../tools/llvm-ir2vec/bindings/ir2vec-bindings.py     | 10 ++++++++++
 llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp          | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
index bb29d33dc8ca6..c9c241ccf90c0 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
@@ -12,6 +12,12 @@
     print("SUCCESS: Tool initialized")
     print(f"Tool type: {type(tool).__name__}")
 
+    # Test getFuncNames
+    print("\n=== Function Names ===")
+    func_names = tool.getFuncNames()
+    for func_name in sorted(func_names):
+        print(f"Function: {func_name}")
+
     # Test getFuncEmbMap
     print("\n=== Function Embeddings ===")
     func_emb_map = tool.getFuncEmbMap()
@@ -57,6 +63,10 @@
 
 # CHECK: SUCCESS: Tool initialized
 # CHECK: Tool type: IR2VecTool
+# CHECK: === Function Names ===
+# CHECK: Function: add
+# CHECK: Function: conditional
+# CHECK: Function: multiply
 # CHECK: === Function Embeddings ===
 # CHECK: Function: add
 # CHECK-NEXT:   Embedding: [38.0, 40.0, 42.0]
diff --git a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
index e4ddaf9c14e5a..df372aedb9b63 100644
--- a/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
+++ b/llvm/tools/llvm-ir2vec/Bindings/PyIR2Vec.cpp
@@ -70,6 +70,14 @@ class PyIR2VecTool {
     }
   }
 
+  nb::list getFuncNames() {
+    nb::list NbFuncNames;
+    for (const Function &F : M->getFunctionDefs()) {
+      NbFuncNames.append(nb::str(F.getName().str().c_str()));
+    }
+    return NbFuncNames;
+  }
+
   nb::dict getFuncEmbMap() {
     auto ToolFuncEmbMap = Tool->getFunctionEmbeddingsMap(OutputEmbeddingMode);
 
@@ -196,6 +204,9 @@ NB_MODULE(ir2vec, m) {
       .def(nb::init<const std::string &, const std::string &,
                     const std::string &>(),
            nb::arg("filename"), nb::arg("mode"), nb::arg("vocabPath"))
+      .def("getFuncNames", &PyIR2VecTool::getFuncNames,
+           "Get list of all defined functions in the module\n"
+           "Returns: list[str] - Function names")
       .def("getFuncEmbMap", &PyIR2VecTool::getFuncEmbMap,
            "Generate function-level embeddings for all functions\n"
            "Returns: dict[str, ndarray[float64]] - "

>From f6886efad108cd8ac2df97c1f8ad57fbb0055690 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 19:42:23 +0530
Subject: [PATCH 2/2] Refactoring the ir2vec python bindings testing to make it
 more modular and thorough

---
 .../llvm-ir2vec/bindings/ir2vec-bindings.py   | 125 ------------------
 .../llvm-ir2vec/bindings/ir2vec-exceptions.py |  38 ------
 .../bindings/ir2vec-getBBEmbMap.py            |  25 ++++
 .../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py |  21 +++
 .../bindings/ir2vec-getFuncEmbMap.py          |  18 +++
 .../bindings/ir2vec-getFuncNames.py           |  18 +++
 .../bindings/ir2vec-getInstEmbMap.py          |  27 ++++
 .../bindings/ir2vec-initEmbedding.py          |  54 ++++++++
 8 files changed, 163 insertions(+), 163 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
 delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
 create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py

diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
deleted file mode 100644
index c9c241ccf90c0..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-bindings.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
-
-import sys
-import ir2vec
-
-ll_file = sys.argv[1]
-vocab_path = sys.argv[2]
-
-tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
-
-if tool is not None:
-    print("SUCCESS: Tool initialized")
-    print(f"Tool type: {type(tool).__name__}")
-
-    # Test getFuncNames
-    print("\n=== Function Names ===")
-    func_names = tool.getFuncNames()
-    for func_name in sorted(func_names):
-        print(f"Function: {func_name}")
-
-    # Test getFuncEmbMap
-    print("\n=== Function Embeddings ===")
-    func_emb_map = tool.getFuncEmbMap()
-
-    # Sorting the function names for fixed-ordered output
-    for func_name in sorted(func_emb_map.keys()):
-        emb = func_emb_map[func_name]
-        print(f"Function: {func_name}")
-        print(f"  Embedding: {emb.tolist()}")
-
-    # Test getFuncEmb for individual functions
-    print("\n=== Single Function Embeddings ===")
-
-    # Test valid function names
-    for func_name in ["add", "multiply", "conditional"]:
-        func_emb = tool.getFuncEmb(func_name)
-        print(f"Function: {func_name}")
-        print(f"  Embedding: {func_emb.tolist()}")
-
-    # Test getBBEmbMap
-    print("\n=== Basic Block Embeddings ===")
-
-    # Test valid function names in sorted order
-    for func_name in sorted(["add", "multiply", "conditional"]):
-        bb_emb_map = tool.getBBEmbMap(func_name)
-        print(f"Function: {func_name}")
-        for bb_name in sorted(bb_emb_map.keys()):
-            emb = bb_emb_map[bb_name]
-            print(f"  BB: {bb_name}")
-            print(f"    Embedding: {emb.tolist()}")
-
-    # Test getInstEmbMap
-    print("\n=== Instruction Embeddings ===")
-
-    # Test valid function names in sorted order
-    for func_name in sorted(["add", "multiply", "conditional"]):
-        inst_emb_map = tool.getInstEmbMap(func_name)
-        print(f"Function: {func_name}")
-        for inst_str in sorted(inst_emb_map.keys()):
-            emb = inst_emb_map[inst_str]
-            print(f"  Inst: {inst_str}")
-            print(f"    Embedding: {emb.tolist()}")
-
-# CHECK: SUCCESS: Tool initialized
-# CHECK: Tool type: IR2VecTool
-# CHECK: === Function Names ===
-# CHECK: Function: add
-# CHECK: Function: conditional
-# CHECK: Function: multiply
-# CHECK: === Function Embeddings ===
-# CHECK: Function: add
-# CHECK-NEXT:   Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: conditional
-# CHECK-NEXT:   Embedding: [413.20000000298023, 421.20000000298023, 429.20000000298023]
-# CHECK: Function: multiply
-# CHECK-NEXT:   Embedding: [50.0, 52.0, 54.0]
-# CHECK: === Single Function Embeddings ===
-# CHECK: Function: add
-# CHECK-NEXT:   Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: multiply
-# CHECK-NEXT:   Embedding: [50.0, 52.0, 54.0]
-# CHECK: Function: conditional
-# CHECK-NEXT:   Embedding: [413.20000000298023, 421.20000000298023, 429.20000000298023]
-# CHECK: === Basic Block Embeddings ===
-# CHECK: Function: add
-# CHECK:   BB: entry
-# CHECK-NEXT:     Embedding: [38.0, 40.0, 42.0]
-# CHECK: Function: conditional
-# CHECK:   BB: entry
-# CHECK-NEXT:     Embedding: [161.20000000298023, 163.20000000298023, 165.20000000298023]
-# CHECK:   BB: exit
-# CHECK-NEXT:     Embedding: [164.0, 166.0, 168.0]
-# CHECK:   BB: negative
-# CHECK-NEXT:     Embedding: [47.0, 49.0, 51.0]
-# CHECK:   BB: positive
-# CHECK-NEXT:     Embedding: [41.0, 43.0, 45.0]
-# CHECK: Function: multiply
-# CHECK:   BB: entry
-# CHECK-NEXT:     Embedding: [50.0, 52.0, 54.0]
-# CHECK: === Instruction Embeddings ===
-# CHECK: Function: add
-# CHECK:   Inst: %sum = add i32 %a, %b
-# CHECK-NEXT:     Embedding: [37.0, 38.0, 39.0]
-# CHECK:   Inst: ret i32 %sum
-# CHECK-NEXT:     Embedding: [1.0, 2.0, 3.0]
-# CHECK: Function: conditional
-# CHECK:   Inst: %cmp = icmp sgt i32 %n, 0
-# CHECK-NEXT:     Embedding: [157.20000000298023, 158.20000000298023, 159.20000000298023]
-# CHECK:   Inst: %neg_val = sub i32 %n, 10
-# CHECK-NEXT:     Embedding: [43.0, 44.0, 45.0]
-# CHECK:   Inst: %pos_val = add i32 %n, 10
-# CHECK-NEXT:     Embedding: [37.0, 38.0, 39.0]
-# CHECK:   Inst: %result = phi i32 [ %pos_val, %positive ], [ %neg_val, %negative ]
-# CHECK-NEXT:     Embedding: [163.0, 164.0, 165.0]
-# CHECK:   Inst: br i1 %cmp, label %positive, label %negative
-# CHECK-NEXT:     Embedding: [4.0, 5.0, 6.0]
-# CHECK:   Inst: br label %exit
-# CHECK-NEXT:     Embedding: [4.0, 5.0, 6.0]
-# CHECK:   Inst: ret i32 %result
-# CHECK-NEXT:     Embedding: [1.0, 2.0, 3.0]
-# CHECK: Function: multiply
-# CHECK:   Inst: %prod = mul i32 %x, %y
-# CHECK-NEXT:     Embedding: [49.0, 50.0, 51.0]
-# CHECK:   Inst: ret i32 %prod
-# CHECK-NEXT:     Embedding: [1.0, 2.0, 3.0]
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
deleted file mode 100644
index af96be07c2364..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s | FileCheck %s
-
-import ir2vec
-
-
-def test_invalid_file():
-    """Test that invalid file path raises ValueError"""
-    try:
-        tool = ir2vec.initEmbedding(
-            filename="/this/does/not/exist.ll",
-            mode="sym",
-            vocabPath="/also/fake/vocab.json",
-        )
-        return "FAIL: No exception raised"
-    except ValueError as e:
-        return f"PASS: ValueError raised - {str(e)[:40]}"
-    except Exception as e:
-        return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-def test_empty_filename():
-    """Test that empty filename raises ValueError"""
-    try:
-        tool = ir2vec.initEmbedding(filename="", mode="sym", vocabPath="dummy.json")
-        return "FAIL: No exception raised"
-    except ValueError:
-        return "PASS: ValueError raised for empty filename"
-    except Exception as e:
-        return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-result1 = test_invalid_file()
-print(f"Test 1: {result1}")
-# CHECK: Test 1: PASS: ValueError raised
-
-result2 = test_empty_filename()
-print(f"Test 2: {result2}")
-# CHECK: Test 2: PASS: ValueError raised
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
new file mode 100644
index 0000000000000..415046a391dc7
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -0,0 +1,25 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+bb_map = tool.getBBEmbMap("conditional")
+for bb in sorted(bb_map.keys()):
+    print(f"BB: {bb}, EMB: {bb_map[bb].tolist()}")
+# CHECK: BB: entry, EMB: [161.20000000298023, 163.20000000298023, 165.20000000298023]
+# CHECK: BB: exit, EMB: [164.0, 166.0, 168.0]
+# CHECK: BB: negative, EMB: [47.0, 49.0, 51.0]
+# CHECK: BB: positive, EMB: [41.0, 43.0, 45.0]
+
+# Error: Function not found
+try:
+    tool.getBBEmbMap("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
new file mode 100644
index 0000000000000..9f72870408837
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -0,0 +1,21 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb = tool.getFuncEmb("add")
+print(f"SUCCESS: {emb.tolist()}")
+# CHECK: SUCCESS: [38.0, 40.0, 42.0]
+
+# Error: Function not found
+try:
+    tool.getFuncEmb("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
new file mode 100644
index 0000000000000..a306a652ac9bd
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb_map = tool.getFuncEmbMap()
+for name in sorted(emb_map.keys()):
+    print(f"FUNC: {name}, EMB: {emb_map[name].tolist()}")
+
+# CHECK: FUNC: add, EMB: [38.0, 40.0, 42.0]
+# CHECK: FUNC: conditional, EMB: [413.20000000298023, 421.20000000298023, 429.20000000298023]
+# CHECK: FUNC: multiply, EMB: [50.0, 52.0, 54.0]
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
new file mode 100644
index 0000000000000..b121d24ba896f
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+func_names = tool.getFuncNames()
+for name in sorted(func_names):
+    print(f"FUNC: {name}")
+
+# CHECK: FUNC: add
+# CHECK: FUNC: conditional
+# CHECK: FUNC: multiply
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
new file mode 100644
index 0000000000000..1af41a803e551
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -0,0 +1,27 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+inst_map = tool.getInstEmbMap("add")
+for inst in sorted(inst_map.keys()):
+    print(f"INST: {inst}")
+    print(f"  EMB: {inst_map[inst].tolist()}")
+
+# CHECK: INST: %sum = add i32 %a, %b
+# CHECK:   EMB: [37.0, 38.0, 39.0]
+# CHECK: INST: ret i32 %sum
+# CHECK:   EMB: [1.0, 2.0, 3.0]
+
+# Error: Function not found
+try:
+    tool.getInstEmbMap("nonexistent")
+except ValueError:
+    print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
new file mode 100644
index 0000000000000..f35c118a3c3d1
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -0,0 +1,54 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+# Success case
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+print(f"SUCCESS: {type(tool).__name__}")
+# CHECK: SUCCESS: IR2VecTool
+
+# Error: Invalid mode
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
+except ValueError:
+    print("ERROR: Invalid mode")
+# CHECK: ERROR: Invalid mode
+
+# Error: Empty vocab path
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+except ValueError:
+    print("ERROR: Empty vocab path")
+# CHECK: ERROR: Empty vocab path
+
+# Error: Invalid file
+try:
+    ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+except ValueError:
+    print("ERROR: Invalid file")
+# CHECK: ERROR: Invalid file
+
+# Error: Invalid vocab file
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+except ValueError:
+    print("ERROR: Invalid vocab")
+# CHECK: ERROR: Invalid vocab
+
+# Error: Malformed JSON vocab
+import tempfile
+import os
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+    f.write("{ this is not valid json }")
+    bad_vocab = f.name
+try:
+    ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+except ValueError:
+    print("ERROR: Invalid vocab file")
+finally:
+    os.unlink(bad_vocab)
+# CHECK: ERROR: Invalid vocab file
\ No newline at end of file



More information about the llvm-commits mailing list