[llvm] Adding IR2Vec as an analysis pass (PR #134004)

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Fri May 16 14:30:24 PDT 2025


================
@@ -0,0 +1,300 @@
+//===- IR2Vec.cpp - Implementation of IR2Vec -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
+// Exceptions. See the LICENSE file for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the IR2Vec algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IR2Vec.h"
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace ir2vec;
+
+#define DEBUG_TYPE "ir2vec"
+
+STATISTIC(VocabMissCounter,
+          "Number of lookups to entites not present in the vocabulary");
+
+static cl::OptionCategory IR2VecCategory("IR2Vec Options");
+
+// FIXME: Use a default vocab when not specified
+static cl::opt<std::string>
+    VocabFile("ir2vec-vocab-path", cl::Optional,
+              cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
+              cl::cat(IR2VecCategory));
+static cl::opt<float> OpcWeight("ir2vec-opc-weight", cl::Optional,
+                                cl::init(1.0),
+                                cl::desc("Weight for opcode embeddings"),
+                                cl::cat(IR2VecCategory));
+static cl::opt<float> TypeWeight("ir2vec-type-weight", cl::Optional,
+                                 cl::init(0.5),
+                                 cl::desc("Weight for type embeddings"),
+                                 cl::cat(IR2VecCategory));
+static cl::opt<float> ArgWeight("ir2vec-arg-weight", cl::Optional,
+                                cl::init(0.2),
+                                cl::desc("Weight for argument embeddings"),
+                                cl::cat(IR2VecCategory));
+
+AnalysisKey IR2VecVocabAnalysis::Key;
+
+// ==----------------------------------------------------------------------===//
+// Embedder and its subclasses
+//===----------------------------------------------------------------------===//
+
+#define RETURN_LOOKUP_IF(CONDITION, KEY_STR)                                   \
+  if (CONDITION)                                                               \
+    return lookupVocab(KEY_STR);
+
+Embedder::Embedder(const Function &F, const Vocab &Vocabulary,
+                   unsigned Dimension)
+    : F(F), Vocabulary(Vocabulary), Dimension(Dimension), OpcWeight(OpcWeight),
+      TypeWeight(TypeWeight), ArgWeight(ArgWeight) {}
+
+ErrorOr<std::unique_ptr<Embedder>> Embedder::create(IR2VecKind Mode,
+                                                    const Function &F,
+                                                    const Vocab &Vocabulary,
+                                                    unsigned Dimension) {
+  switch (Mode) {
+  case IR2VecKind::Symbolic:
+    return std::make_unique<SymbolicEmbedder>(F, Vocabulary, Dimension);
+  default:
+    return errc::invalid_argument;
----------------
snehasish wrote:

maybe `return make_error<StringError>("Unknown IR2VecKind") so that you get a nice error message.

https://github.com/llvm/llvm-project/pull/134004


More information about the llvm-commits mailing list