[llvm] [llvm-ir2vec] Refactoring the ir2vec python bindings testing (PR #180664)
Nishant Sachdeva via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 05:36:20 PST 2026
https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/180664
>From 24aebd987f253de226ba10232ad9e717b2020705 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Mon, 9 Feb 2026 19:42:23 +0530
Subject: [PATCH 1/2] Refactoring the ir2vec python bindings testing to make it
more modular and thorough
---
.../llvm-ir2vec/bindings/ir2vec-exceptions.py | 38 -------------
.../bindings/ir2vec-getBBEmbMap.py | 25 +++++++++
.../llvm-ir2vec/bindings/ir2vec-getFuncEmb.py | 21 ++++++++
.../bindings/ir2vec-getFuncEmbMap.py | 18 +++++++
.../bindings/ir2vec-getFuncNames.py | 18 +++++++
.../bindings/ir2vec-getInstEmbMap.py | 27 ++++++++++
.../bindings/ir2vec-initEmbedding.py | 54 +++++++++++++++++++
7 files changed, 163 insertions(+), 38 deletions(-)
delete mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
create mode 100644 llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
deleted file mode 100644
index af96be07c2364..0000000000000
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-exceptions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: env PYTHONPATH=%llvm_lib_dir %python %s | FileCheck %s
-
-import ir2vec
-
-
-def test_invalid_file():
- """Test that invalid file path raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(
- filename="/this/does/not/exist.ll",
- mode="sym",
- vocabPath="/also/fake/vocab.json",
- )
- return "FAIL: No exception raised"
- except ValueError as e:
- return f"PASS: ValueError raised - {str(e)[:40]}"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-def test_empty_filename():
- """Test that empty filename raises ValueError"""
- try:
- tool = ir2vec.initEmbedding(filename="", mode="sym", vocabPath="dummy.json")
- return "FAIL: No exception raised"
- except ValueError:
- return "PASS: ValueError raised for empty filename"
- except Exception as e:
- return f"FAIL: Wrong exception - {type(e).__name__}"
-
-
-result1 = test_invalid_file()
-print(f"Test 1: {result1}")
-# CHECK: Test 1: PASS: ValueError raised
-
-result2 = test_empty_filename()
-print(f"Test 2: {result2}")
-# CHECK: Test 2: PASS: ValueError raised
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
new file mode 100644
index 0000000000000..415046a391dc7
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getBBEmbMap.py
@@ -0,0 +1,25 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+bb_map = tool.getBBEmbMap("conditional")
+for bb in sorted(bb_map.keys()):
+ print(f"BB: {bb}, EMB: {bb_map[bb].tolist()}")
+# CHECK: BB: entry, EMB: [161.20000000298023, 163.20000000298023, 165.20000000298023]
+# CHECK: BB: exit, EMB: [164.0, 166.0, 168.0]
+# CHECK: BB: negative, EMB: [47.0, 49.0, 51.0]
+# CHECK: BB: positive, EMB: [41.0, 43.0, 45.0]
+
+# Error: Function not found
+try:
+ tool.getBBEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
new file mode 100644
index 0000000000000..9f72870408837
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmb.py
@@ -0,0 +1,21 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb = tool.getFuncEmb("add")
+print(f"SUCCESS: {emb.tolist()}")
+# CHECK: SUCCESS: [38.0, 40.0, 42.0]
+
+# Error: Function not found
+try:
+ tool.getFuncEmb("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
new file mode 100644
index 0000000000000..a306a652ac9bd
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncEmbMap.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+emb_map = tool.getFuncEmbMap()
+for name in sorted(emb_map.keys()):
+ print(f"FUNC: {name}, EMB: {emb_map[name].tolist()}")
+
+# CHECK: FUNC: add, EMB: [38.0, 40.0, 42.0]
+# CHECK: FUNC: conditional, EMB: [413.20000000298023, 421.20000000298023, 429.20000000298023]
+# CHECK: FUNC: multiply, EMB: [50.0, 52.0, 54.0]
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
new file mode 100644
index 0000000000000..b121d24ba896f
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getFuncNames.py
@@ -0,0 +1,18 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+func_names = tool.getFuncNames()
+for name in sorted(func_names):
+ print(f"FUNC: {name}")
+
+# CHECK: FUNC: add
+# CHECK: FUNC: conditional
+# CHECK: FUNC: multiply
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
new file mode 100644
index 0000000000000..1af41a803e551
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-getInstEmbMap.py
@@ -0,0 +1,27 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+
+# Success case
+inst_map = tool.getInstEmbMap("add")
+for inst in sorted(inst_map.keys()):
+ print(f"INST: {inst}")
+ print(f" EMB: {inst_map[inst].tolist()}")
+
+# CHECK: INST: %sum = add i32 %a, %b
+# CHECK: EMB: [37.0, 38.0, 39.0]
+# CHECK: INST: ret i32 %sum
+# CHECK: EMB: [1.0, 2.0, 3.0]
+
+# Error: Function not found
+try:
+ tool.getInstEmbMap("nonexistent")
+except ValueError:
+ print("ERROR: Function not found")
+# CHECK: ERROR: Function not found
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
new file mode 100644
index 0000000000000..f35c118a3c3d1
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -0,0 +1,54 @@
+# RUN: env PYTHONPATH=%llvm_lib_dir %python %s %S/../Inputs/input.ll %ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json | FileCheck %s
+
+import sys
+import ir2vec
+
+ll_file = sys.argv[1]
+vocab_path = sys.argv[2]
+
+# Success case
+tool = ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=vocab_path)
+print(f"SUCCESS: {type(tool).__name__}")
+# CHECK: SUCCESS: IR2VecTool
+
+# Error: Invalid mode
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="invalid", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid mode")
+# CHECK: ERROR: Invalid mode
+
+# Error: Empty vocab path
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="")
+except ValueError:
+ print("ERROR: Empty vocab path")
+# CHECK: ERROR: Empty vocab path
+
+# Error: Invalid file
+try:
+ ir2vec.initEmbedding(filename="/bad.ll", mode="sym", vocabPath=vocab_path)
+except ValueError:
+ print("ERROR: Invalid file")
+# CHECK: ERROR: Invalid file
+
+# Error: Invalid vocab file
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath="/bad.json")
+except ValueError:
+ print("ERROR: Invalid vocab")
+# CHECK: ERROR: Invalid vocab
+
+# Error: Malformed JSON vocab
+import tempfile
+import os
+with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+ f.write("{ this is not valid json }")
+ bad_vocab = f.name
+try:
+ ir2vec.initEmbedding(filename=ll_file, mode="sym", vocabPath=bad_vocab)
+except ValueError:
+ print("ERROR: Invalid vocab file")
+finally:
+ os.unlink(bad_vocab)
+# CHECK: ERROR: Invalid vocab file
\ No newline at end of file
>From deb4645dcda937373d4d177f2947c5243e7aca35 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Fri, 6 Mar 2026 19:05:35 +0530
Subject: [PATCH 2/2] Formatting nits
---
llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
index f35c118a3c3d1..98996a2859a93 100644
--- a/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
+++ b/llvm/test/tools/llvm-ir2vec/bindings/ir2vec-initEmbedding.py
@@ -42,7 +42,8 @@
# Error: Malformed JSON vocab
import tempfile
import os
-with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+
+with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
f.write("{ this is not valid json }")
bad_vocab = f.name
try:
@@ -51,4 +52,4 @@
print("ERROR: Invalid vocab file")
finally:
os.unlink(bad_vocab)
-# CHECK: ERROR: Invalid vocab file
\ No newline at end of file
+# CHECK: ERROR: Invalid vocab file
More information about the llvm-commits
mailing list