[llvm] [IR2Vec] Adding support for Demangled names lookup to llvm_ir2vec.cpp (PR #172427)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 16 23:08:25 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlgo
Author: Nishant Sachdeva (nishant-sachdeva)
<details>
<summary>Changes</summary>
# Adding Support for Demangled Function Names in llvm-ir2vec
- Addresses https://github.com/llvm/llvm-project/issues/159170
## Summary
Enhanced `llvm-ir2vec` to accept demangled function signatures in addition to mangled names from the IR when using the `--function` option. This improves usability by allowing users to specify functions using human-readable signatures instead of requiring knowledge of name mangling conventions. This becomes especially useful for modules which have overloaded functions in their source codes.
## Changes
**Function Lookup Enhancement:**
- The change has been added via a fallback lookup mechanism that searches by demangled name only when exact match fails
- Implemented using `llvm::demangle()` and `llvm::find_if()`
- Applied consistently to both LLVM IR and MIR processing paths
**Supported Input Formats:**
Users can now specify functions using any of these formats:
- Mangled names: `--function=_Z3fooif` (mangled name, direct lookup)
- Demangled signatures: `--function="foo(int, float)"` (demangled name, fallback search)
**Design Decisions:**
- Base names without signatures (e.g., `--function=foo`) are intentionally not supported to avoid ambiguity with function overloads
- Users must provide either the full mangled name or complete demangled signature to uniquely identify functions
- Maintains backward compatibility with existing mangled name inputs
## Example Usage
```bash
# Before (only mangled names worked)
llvm-ir2vec embeddings --function=_Z3abcif input.ll
# After (both mangled and demangled work)
llvm-ir2vec embeddings --function=_Z3abcif input.ll
llvm-ir2vec embeddings --function="abc(int, float)" input.ll
```
---
Full diff: https://github.com/llvm/llvm-project/pull/172427.diff
2 Files Affected:
- (added) llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll (+139)
- (modified) llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp (+26-2)
``````````diff
diff --git a/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll b/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll
new file mode 100644
index 0000000000000..ebbd8741d9aab
--- /dev/null
+++ b/llvm/test/tools/llvm-ir2vec/embeddings-demangled.ll
@@ -0,0 +1,139 @@
+; RUN: llvm-ir2vec embeddings --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
+; RUN: llvm-ir2vec embeddings --level=func --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
+
+; Test with mangled names
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3addii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3addiii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-INT-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function=_Z3adddd --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-ADD-DOUBLE-DOUBLE
+; RUN: llvm-ir2vec embeddings --level=func --function=main --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-MAIN
+
+; Test with demangled names
+; RUN: llvm-ir2vec embeddings --level=func --function="add(int, int)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function="add(int, int, int)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-INT-INT-INT
+; RUN: llvm-ir2vec embeddings --level=func --function="add(double, double)" --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEMANGLED-DOUBLE-DOUBLE
+
+; Test basic block level for one function
+; RUN: llvm-ir2vec embeddings --level=bb --function=_Z3addii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-ADD-INT-INT
+
+; Test instruction level for one function
+; RUN: llvm-ir2vec embeddings --level=inst --function=_Z3addiii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-ADD-INT-INT-INT
+
+; Test error case - non-existent function
+; RUN: not llvm-ir2vec embeddings --level=func --function=_Z3subii --ir2vec-kind=flow-aware --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-NONEXISTENT
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(int, int)
+define dso_local noundef i32 @_Z3addii(i32 noundef %0, i32 noundef %1) #0 {
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ store i32 %0, ptr %3, align 4
+ store i32 %1, ptr %4, align 4
+ %5 = load i32, ptr %3, align 4
+ %6 = load i32, ptr %4, align 4
+ %7 = add nsw i32 %5, %6
+ ret i32 %7
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(int, int, int)
+define dso_local noundef i32 @_Z3addiii(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ %6 = alloca i32, align 4
+ store i32 %0, ptr %4, align 4
+ store i32 %1, ptr %5, align 4
+ store i32 %2, ptr %6, align 4
+ %7 = load i32, ptr %4, align 4
+ %8 = load i32, ptr %5, align 4
+ %9 = add nsw i32 %7, %8
+ %10 = load i32, ptr %6, align 4
+ %11 = add nsw i32 %9, %10
+ ret i32 %11
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+; add(double, double)
+define dso_local noundef double @_Z3adddd(double noundef %0, double noundef %1) #0 {
+ %3 = alloca double, align 8
+ %4 = alloca double, align 8
+ store double %0, ptr %3, align 8
+ store double %1, ptr %4, align 8
+ %5 = load double, ptr %3, align 8
+ %6 = load double, ptr %4, align 8
+ %7 = fadd double %5, %6
+ ret double %7
+}
+
+; Function Attrs: mustprogress noinline norecurse nounwind optnone uwtable
+define dso_local noundef i32 @main() #1 {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ %4 = alloca double, align 8
+ store i32 0, ptr %1, align 4
+ %5 = call noundef i32 @_Z3addii(i32 noundef 5, i32 noundef 3)
+ store i32 %5, ptr %2, align 4
+ %6 = call noundef i32 @_Z3addiii(i32 noundef 5, i32 noundef 3, i32 noundef 2)
+ store i32 %6, ptr %3, align 4
+ %7 = call noundef double @_Z3adddd(double noundef 5.500000e+00, double noundef 3.200000e+00)
+ store double %7, ptr %4, align 8
+ ret i32 0
+}
+
+; CHECK-DEFAULT: Function: _Z3addii
+; CHECK-DEFAULT-NEXT: [ 1743.00 1764.00 1785.00 ]
+; CHECK-DEFAULT-NEXT: Function: _Z3addiii
+; CHECK-DEFAULT-NEXT: [ 3058.00 3095.00 3132.00 ]
+; CHECK-DEFAULT-NEXT: Function: _Z3adddd
+; CHECK-DEFAULT-NEXT: [ 1749.00 1770.00 1791.00 ]
+; CHECK-DEFAULT-NEXT: Function: main
+; CHECK-DEFAULT-NEXT: [ 2113.00 2132.00 2151.00 ]
+
+; CHECK-FUNC-LEVEL: Function: _Z3addii
+; CHECK-FUNC-LEVEL-NEXT: [ 1743.00 1764.00 1785.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: _Z3addiii
+; CHECK-FUNC-LEVEL-NEXT: [ 3058.00 3095.00 3132.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: _Z3adddd
+; CHECK-FUNC-LEVEL-NEXT: [ 1749.00 1770.00 1791.00 ]
+; CHECK-FUNC-LEVEL-NEXT: Function: main
+; CHECK-FUNC-LEVEL-NEXT: [ 2113.00 2132.00 2151.00 ]
+
+; CHECK-ADD-INT-INT: Function: _Z3addii
+; CHECK-ADD-INT-INT-NEXT: [ 1743.00 1764.00 1785.00 ]
+
+; CHECK-ADD-INT-INT-INT: Function: _Z3addiii
+; CHECK-ADD-INT-INT-INT-NEXT: [ 3058.00 3095.00 3132.00 ]
+
+; CHECK-ADD-DOUBLE-DOUBLE: Function: _Z3adddd
+; CHECK-ADD-DOUBLE-DOUBLE-NEXT: [ 1749.00 1770.00 1791.00 ]
+
+; CHECK-MAIN: Function: main
+; CHECK-MAIN-NEXT: [ 2113.00 2132.00 2151.00 ]
+
+; CHECK-DEMANGLED-INT-INT: Function: _Z3addii
+; CHECK-DEMANGLED-INT-INT-NEXT: [ 1743.00 1764.00 1785.00 ]
+
+; CHECK-DEMANGLED-INT-INT-INT: Function: _Z3addiii
+; CHECK-DEMANGLED-INT-INT-INT-NEXT: [ 3058.00 3095.00 3132.00 ]
+
+; CHECK-DEMANGLED-DOUBLE-DOUBLE: Function: _Z3adddd
+; CHECK-DEMANGLED-DOUBLE-DOUBLE-NEXT: [ 1749.00 1770.00 1791.00 ]
+
+; CHECK-BB-ADD-INT-INT: Function: _Z3addii
+; CHECK-BB-ADD-INT-INT-NEXT: [ 1743.00 1764.00 1785.00 ]
+
+; CHECK-INST-ADD-INT-INT-INT: Function: _Z3addiii
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %4 = alloca i32, align 4 [ 91.00 92.00 93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %5 = alloca i32, align 4 [ 91.00 92.00 93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %6 = alloca i32, align 4 [ 91.00 92.00 93.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %0, ptr %4, align 4 [ 188.00 190.00 192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %1, ptr %5, align 4 [ 188.00 190.00 192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: store i32 %2, ptr %6, align 4 [ 188.00 190.00 192.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %7 = load i32, ptr %4, align 4 [ 185.00 187.00 189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %8 = load i32, ptr %5, align 4 [ 185.00 187.00 189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %9 = add nsw i32 %7, %8 [ 407.00 412.00 417.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %10 = load i32, ptr %6, align 4 [ 185.00 187.00 189.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: %11 = add nsw i32 %9, %10 [ 629.00 637.00 645.00 ]
+; CHECK-INST-ADD-INT-INT-INT-NEXT: ret i32 %11 [ 630.00 639.00 648.00 ]
+
+; CHECK-NONEXISTENT: error: Function '_Z3subii' not found
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index 7b8d3f093a3d1..6119ef35e7e59 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -55,7 +55,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/IR2Vec.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -336,7 +338,17 @@ Error processModule(Module &M, raw_ostream &OS) {
if (!FunctionName.empty()) {
// Process single function
- if (const Function *F = M.getFunction(FunctionName))
+ const Function *F = [&]() -> const Function * {
+ if (auto *ExactMatch = M.getFunction(FunctionName))
+ return ExactMatch;
+ const auto Demangled = llvm::demangle(FunctionName);
+ auto It = llvm::find_if(M, [&](const Function &Func) {
+ return llvm::demangle(Func.getName().str()) == Demangled;
+ });
+
+ return (It != M.end()) ? &*It : nullptr;
+ }();
+ if (F)
Tool.generateEmbeddings(*F, OS);
else
return createStringError(errc::invalid_argument,
@@ -727,7 +739,19 @@ int main(int argc, char **argv) {
} else if (EmbeddingsSubCmd) {
if (!FunctionName.empty()) {
// Process single function
- Function *F = M->getFunction(FunctionName);
+ const Function *F = [&]() -> const Function * {
+ if (auto *ExactMatch = M->getFunction(FunctionName))
+ return ExactMatch;
+
+ const auto Demangled = llvm::demangle(FunctionName);
+ auto It = llvm::find_if(*M, [&](const Function &Func) {
+ return llvm::demangle(Func.getName().str()) == Demangled;
+ });
+
+ return (It != M->end()) ? &*It
+ : nullptr; // Change M.end() to M->end()
+ }();
+
if (!F) {
WithColor::error(errs(), ToolName)
<< "Function '" << FunctionName << "' not found\n";
``````````
</details>
https://github.com/llvm/llvm-project/pull/172427
More information about the llvm-commits
mailing list