[llvm] [IR2Vec] Adding support for Demangled names lookup to llvm_ir2vec.cpp (PR #172427)

Nishant Sachdeva via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 16 22:58:18 PST 2025


https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/172427

>From b38fb06692d13f4441abb4a6066491d7be54e47a Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Tue, 16 Dec 2025 12:02:41 +0530
Subject: [PATCH 1/3] Adding support for Demangled names lookup to
 llvm_ir2vec.cpp

---
 llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index 7b8d3f093a3d1..3effd7657a883 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -55,7 +55,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/IR2Vec.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -336,7 +338,17 @@ Error processModule(Module &M, raw_ostream &OS) {
 
     if (!FunctionName.empty()) {
       // Process single function
-      if (const Function *F = M.getFunction(FunctionName))
+      const Function *F = [&]() -> const Function * {
+        if (auto *ExactMatch = M.getFunction(FunctionName)) return ExactMatch;
+
+        const auto Demangled = llvm::demangle(FunctionName);
+        auto It = llvm::find_if(M, [&](const Function &Func) {
+          return llvm::demangle(Func.getName().str()) == Demangled;
+        });
+
+        return (It != M.end()) ? &*It : nullptr;
+      }();
+      if (F)
         Tool.generateEmbeddings(*F, OS);
       else
         return createStringError(errc::invalid_argument,
@@ -727,7 +739,17 @@ int main(int argc, char **argv) {
     } else if (EmbeddingsSubCmd) {
       if (!FunctionName.empty()) {
         // Process single function
-        Function *F = M->getFunction(FunctionName);
+        const Function *F = [&]() -> const Function * {
+          if (auto *ExactMatch = M->getFunction(FunctionName)) return ExactMatch;
+
+          const auto Demangled = llvm::demangle(FunctionName);
+          auto It = llvm::find_if(*M, [&](const Function &Func) {
+            return llvm::demangle(Func.getName().str()) == Demangled;
+          });
+
+          return (It != M->end()) ? &*It : nullptr;  // Change M.end() to M->end()
+        }();
+
         if (!F) {
           WithColor::error(errs(), ToolName)
               << "Function '" << FunctionName << "' not found\n";

>From 951480a4dcdc8fa96d77c6bba07e41f0fbffdede Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Tue, 16 Dec 2025 14:54:04 +0530
Subject: [PATCH 2/3] Fixup commit - added test case for IR2Vec demangled names

---
 .../tools/llvm-ir2vec/embeddings-demangled.ll | 139 ++++++++++++++++++
 llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp        |   1 -
 2 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll

diff --git a/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll b/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll
new file mode 100644
index 0000000000000..ebbd8741d9aab
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll
@@ -0,0 +1,139 @@
+; RUN: llvm-ir2vec embeddings --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
+; RUN: llvm-ir2vec embeddings --level=func --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
+
+; Test with mangled names
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3addii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3addiii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-INT-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3adddd --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-DOUBLE-DOUBLE
+; RUN: llvm-ir2vec embeddings --level=func --function=main --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-MAIN
+
+; Test with demangled names
+; RUN: llvm-ir2vec embeddings --level=func --function="add(int, int)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function="add(int, int, int)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-INT-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function="add(double, double)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-DOUBLE-DOUBLE
+
+; Test basic block level for one function
+; RUN: llvm-ir2vec embeddings --level=bb --function=_Z3addii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-ADD-INT-INT
+
+; Test instruction level for one function
+; RUN: llvm-ir2vec embeddings --level=inst --function=_Z3addiii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-ADD-INT-INT-INT
+
+; Test error case - non-existent function
+; RUN: not llvm-ir2vec embeddings --level=func --function=_Z3subii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-NONEXISTENT
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(int, int)
+define dso_local noundef i32 @_Z3addii(i32 noundef %0, i32 noundef %1) #0 {
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  store i32 %0, ptr %3, align 4
+  store i32 %1, ptr %4, align 4
+  %5 = load i32, ptr %3, align 4
+  %6 = load i32, ptr %4, align 4
+  %7 = add nsw i32 %5, %6
+  ret i32 %7
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(int, int, int)
+define dso_local noundef i32 @_Z3addiii(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
+  %4 = alloca i32, align 4
+  %5 = alloca i32, align 4
+  %6 = alloca i32, align 4
+  store i32 %0, ptr %4, align 4
+  store i32 %1, ptr %5, align 4
+  store i32 %2, ptr %6, align 4
+  %7 = load i32, ptr %4, align 4
+  %8 = load i32, ptr %5, align 4
+  %9 = add nsw i32 %7, %8
+  %10 = load i32, ptr %6, align 4
+  %11 = add nsw i32 %9, %10
+  ret i32 %11
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(double, double)
+define dso_local noundef double @_Z3adddd(double noundef %0, double noundef %1) #0 {
+  %3 = alloca double, align 8
+  %4 = alloca double, align 8
+  store double %0, ptr %3, align 8
+  store double %1, ptr %4, align 8
+  %5 = load double, ptr %3, align 8
+  %6 = load double, ptr %4, align 8
+  %7 = fadd double %5, %6
+  ret double %7
+}
+
+; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable
+define dso_local noundef i32 @main() #1 {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca double, align 8
+  store i32 0, ptr %1, align 4
+  %5 = call noundef i32 @_Z3addii(i32 noundef 5, i32 noundef 3)
+  store i32 %5, ptr %2, align 4
+  %6 = call noundef i32 @_Z3addiii(i32 noundef 5, i32 noundef 3, i32 noundef 2)
+  store i32 %6, ptr %3, align 4
+  %7 = call noundef double @_Z3adddd(double noundef 5.500000e+00, double noundef 3.200000e+00)
+  store double %7, ptr %4, align 8
+  ret i32 0
+}
+
+; CHECK-DEFAULT: Function: _Z3addii
+; CHECK-DEFAULT-NEXT: [ 1743.00  1764.00  1785.00 ]
+; CHECK-DEFAULT-NEXT: Function: _Z3addiii
+; CHECK-DEFAULT-NEXT: [ 3058.00  3095.00  3132.00 ]
+; CHECK-DEFAULT-NEXT: Function: _Z3adddd
+; CHECK-DEFAULT-NEXT: [ 1749.00  1770.00  1791.00 ]
+; CHECK-DEFAULT-NEXT: Function: main
+; CHECK-DEFAULT-NEXT: [ 2113.00  2132.00  2151.00 ]
+
+; CHECK-FUNC-LEVEL: Function: _Z3addii
+; CHECK-FUNC-LEVEL-NEXT: [ 1743.00  1764.00  1785.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: _Z3addiii
+; CHECK-FUNC-LEVEL-NEXT: [ 3058.00  3095.00  3132.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: _Z3adddd
+; CHECK-FUNC-LEVEL-NEXT: [ 1749.00  1770.00  1791.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: main
+; CHECK-FUNC-LEVEL-NEXT: [ 2113.00  2132.00  2151.00 ]
+
+; CHECK-ADD-INT-INT: Function: _Z3addii
+; CHECK-ADD-INT-INT-NEXT: [ 1743.00  1764.00  1785.00 ]
+
+; CHECK-ADD-INT-INT-INT: Function: _Z3addiii
+; CHECK-ADD-INT-INT-INT-NEXT: [ 3058.00  3095.00  3132.00 ]
+
+; CHECK-ADD-DOUBLE-DOUBLE: Function: _Z3adddd
+; CHECK-ADD-DOUBLE-DOUBLE-NEXT: [ 1749.00  1770.00  1791.00 ]
+
+; CHECK-MAIN: Function: main
+; CHECK-MAIN-NEXT: [ 2113.00  2132.00  2151.00 ]
+
+; CHECK-DEMANGLED-INT-INT: Function: _Z3addii
+; CHECK-DEMANGLED-INT-INT-NEXT: [ 1743.00  1764.00  1785.00 ]
+
+; CHECK-DEMANGLED-INT-INT-INT: Function: _Z3addiii
+; CHECK-DEMANGLED-INT-INT-INT-NEXT: [ 3058.00  3095.00  3132.00 ]
+
+; CHECK-DEMANGLED-DOUBLE-DOUBLE: Function: _Z3adddd
+; CHECK-DEMANGLED-DOUBLE-DOUBLE-NEXT: [ 1749.00  1770.00  1791.00 ]
+
+; CHECK-BB-ADD-INT-INT: Function: _Z3addii
+; CHECK-BB-ADD-INT-INT-NEXT: [ 1743.00  1764.00  1785.00 ]
+
+; CHECK-INST-ADD-INT-INT-INT: Function: _Z3addiii
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %4 = alloca i32, align 4 [ 91.00  92.00  93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %5 = alloca i32, align 4 [ 91.00  92.00  93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %6 = alloca i32, align 4 [ 91.00  92.00  93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %0, ptr %4, align 4 [ 188.00  190.00  192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %1, ptr %5, align 4 [ 188.00  190.00  192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %2, ptr %6, align 4 [ 188.00  190.00  192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %7 = load i32, ptr %4, align 4 [ 185.00  187.00  189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %8 = load i32, ptr %5, align 4 [ 185.00  187.00  189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %9 = add nsw i32 %7, %8 [ 407.00  412.00  417.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %10 = load i32, ptr %6, align 4 [ 185.00  187.00  189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %11 = add nsw i32 %9, %10 [ 629.00  637.00  645.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: ret i32 %11 [ 630.00  639.00  648.00 ]
+
+; CHECK-NONEXISTENT: error: Function '_Z3subii' not found
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index 3effd7657a883..15668778357eb 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -340,7 +340,6 @@ Error processModule(Module &M, raw_ostream &OS) {
       // Process single function
       const Function *F = [&]() -> const Function * {
         if (auto *ExactMatch = M.getFunction(FunctionName)) return ExactMatch;
-
         const auto Demangled = llvm::demangle(FunctionName);
         auto It = llvm::find_if(M, [&](const Function &Func) {
           return llvm::demangle(Func.getName().str()) == Demangled;

>From 9ac341e7c60b16a90f92710698fa484ed1f8c149 Mon Sep 17 00:00:00 2001
From: nishant-sachdeva <nishant.sachdeva at research.iiit.ac.in>
Date: Wed, 17 Dec 2025 12:27:59 +0530
Subject: [PATCH 3/3] Nit commit - formatting fixup

---
 llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index 15668778357eb..6119ef35e7e59 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -339,7 +339,8 @@ Error processModule(Module &M, raw_ostream &OS) {
     if (!FunctionName.empty()) {
       // Process single function
       const Function *F = [&]() -> const Function * {
-        if (auto *ExactMatch = M.getFunction(FunctionName)) return ExactMatch;
+        if (auto *ExactMatch = M.getFunction(FunctionName))
+          return ExactMatch;
         const auto Demangled = llvm::demangle(FunctionName);
         auto It = llvm::find_if(M, [&](const Function &Func) {
           return llvm::demangle(Func.getName().str()) == Demangled;
@@ -739,14 +740,16 @@ int main(int argc, char **argv) {
       if (!FunctionName.empty()) {
         // Process single function
         const Function *F = [&]() -> const Function * {
-          if (auto *ExactMatch = M->getFunction(FunctionName)) return ExactMatch;
+          if (auto *ExactMatch = M->getFunction(FunctionName))
+            return ExactMatch;
 
           const auto Demangled = llvm::demangle(FunctionName);
           auto It = llvm::find_if(*M, [&](const Function &Func) {
             return llvm::demangle(Func.getName().str()) == Demangled;
           });
 
-          return (It != M->end()) ? &*It : nullptr;  // Change M.end() to M->end()
+          return (It != M->end()) ? &*It
+                                  : nullptr; // Change M.end() to M->end()
         }();
 
         if (!F) {



More information about the llvm-commits mailing list