[llvm] [IR2Vec] Initial infrastructure for MIR2Vec (PR #161463)
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 17:51:57 PDT 2025
================
@@ -0,0 +1,201 @@
+//===- MIR2VecTest.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIR2Vec.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace mir2vec;
+using VocabMap = std::map<std::string, ir2vec::Embedding>;
+
+namespace {
+
+TEST(MIR2VecTest, RegexExtraction) {
+ // Test simple instruction names
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("NOP"), "NOP");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("RET"), "RET");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("ADD16ri"), "ADD");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("ADD32rr"), "ADD");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("ADD64rm"), "ADD");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("MOV8ri"), "MOV");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("MOV32mr"), "MOV");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("PUSH64r"), "PUSH");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("POP64r"), "POP");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("JMP_4"), "JMP");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("CALL64pcrel32"), "CALL");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("SOME_INSTR_123"),
+ "SOME_INSTR");
+ EXPECT_EQ(MIRVocabulary::extractBaseOpcodeName("123ADD"), "ADD");
+ EXPECT_FALSE(MIRVocabulary::extractBaseOpcodeName("123").empty());
+}
+
+class MIR2VecVocabTestFixture : public ::testing::Test {
+protected:
+ std::unique_ptr<LLVMContext> Ctx;
+ std::unique_ptr<Module> M;
+ std::unique_ptr<TargetMachine> TM;
+ const TargetInstrInfo *TII;
+
+ void SetUp() override {
+ LLVMInitializeX86TargetInfo();
+ LLVMInitializeX86Target();
+ LLVMInitializeX86TargetMC();
+
+ Ctx = std::make_unique<LLVMContext>();
+ M = std::make_unique<Module>("test", *Ctx);
+
+ // Set up X86 target
+ Triple TargetTriple("x86_64-unknown-linux-gnu");
+ M->setTargetTriple(TargetTriple);
+
+ std::string Error;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+ ASSERT_TRUE(TheTarget) << "Failed to lookup target: " << Error;
+
+ TargetOptions Options;
+ TM = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+ M->getTargetTriple(), "", "", Options, Reloc::Model::Static));
+ ASSERT_TRUE(TM);
+
+ // Create a dummy function to get subtarget info
+ FunctionType *FT = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F =
+ Function::Create(FT, Function::ExternalLinkage, "test", M.get());
+
+ // Get the target instruction info
+ TII = TM->getSubtargetImpl(*F)->getInstrInfo();
+ ASSERT_TRUE(TII);
+ }
+
+ void TearDown() override {
+ TM.reset();
+ M.reset();
+ Ctx.reset();
+ }
+};
+
+TEST_F(MIR2VecVocabTestFixture, CanonicalOpcodeMappingTest) {
+ // Test that same base opcodes get same canonical indices
+ std::string baseName1 = MIRVocabulary::extractBaseOpcodeName("ADD16ri");
----------------
mtrofin wrote:
names? BaseName1 (capitalization)
same further down in a few places.
https://github.com/llvm/llvm-project/pull/161463
More information about the llvm-commits
mailing list