[llvm] 5155dff - [IRSim] Adding basic implementation of llvm-sim.

Andrew Litteken via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 20 14:48:22 PDT 2021


Author: Andrew Litteken
Date: 2021-03-20T16:47:50-05:00
New Revision: 5155dff2784a47583d432d796b7cf47a0bed9f20

URL: https://github.com/llvm/llvm-project/commit/5155dff2784a47583d432d796b7cf47a0bed9f20
DIFF: https://github.com/llvm/llvm-project/commit/5155dff2784a47583d432d796b7cf47a0bed9f20.diff

LOG: [IRSim] Adding basic implementation of llvm-sim.

This is a similarity visualization tool that accepts a Module and
passes it to the IRSimilarityIdentifier.  The resulting SimilarityGroups
are output in a JSON file.

Tests are found in test/tools/llvm-sim and check for the file not found,
a bad module, and that the JSON is created correctly.

Reviewers: paquette, jroelofs, MaskRay

Recommit of: 15645d044bcfe2a0f63156048b302f997a717688 to fix linking
errors.

Differential Revision: https://reviews.llvm.org/D86974

Added: 
    llvm/test/tools/llvm-sim/Inputs/sim1.ll
    llvm/test/tools/llvm-sim/fail-cases.test
    llvm/test/tools/llvm-sim/single-sim-file.test
    llvm/test/tools/llvm-sim/single-sim.test
    llvm/tools/llvm-sim/CMakeLists.txt
    llvm/tools/llvm-sim/llvm-sim.cpp

Modified: 
    llvm/test/CMakeLists.txt
    llvm/test/lit.cfg.py

Removed: 
    


################################################################################
diff  --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 7c4fa2e9033a..0c72adca931b 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -109,6 +109,7 @@ set(LLVM_TEST_DEPENDS
           llvm-readelf
           llvm-reduce
           llvm-rtdyld
+          llvm-sim
           llvm-size
           llvm-split
           llvm-strings

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 2a1ccc2dcfbd..244d69e01cfc 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -162,7 +162,7 @@ def get_asan_rtlib():
     'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
     'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
     'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',
-    'llvm-readobj', 'llvm-rtdyld', 'llvm-size', 'llvm-split', 'llvm-strings',
+    'llvm-readobj', 'llvm-rtdyld', 'llvm-sim', 'llvm-size', 'llvm-split', 'llvm-strings',
     'llvm-strip', 'llvm-tblgen', 'llvm-undname', 'llvm-c-test', 'llvm-cxxfilt',
     'llvm-xray', 'yaml2obj', 'obj2yaml', 'yaml-bench', 'verify-uselistorder',
     'bugpoint', 'llc', 'llvm-symbolizer', 'opt', 'sancov', 'sanstats'])

diff  --git a/llvm/test/tools/llvm-sim/Inputs/sim1.ll b/llvm/test/tools/llvm-sim/Inputs/sim1.ll
new file mode 100644
index 000000000000..facc27d285b0
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/Inputs/sim1.ll
@@ -0,0 +1,27 @@
+define void @similar_func1() {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @similar_func2() {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}

diff  --git a/llvm/test/tools/llvm-sim/fail-cases.test b/llvm/test/tools/llvm-sim/fail-cases.test
new file mode 100644
index 000000000000..41e3a5617acb
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/fail-cases.test
@@ -0,0 +1,8 @@
+# RUN: not llvm-sim %s 2>&1 | FileCheck %s
+# RUN: not llvm-sim %s.2 2>&1 | FileCheck %s --check-prefix=EXIST
+
+# File reading error messaging tests.
+
+# CHECK: error: expected top-level entity
+
+# EXIST: error: Could not open input file: No such file or directory

diff  --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test
new file mode 100644
index 000000000000..5e45edf12c2c
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/single-sim-file.test
@@ -0,0 +1,57 @@
+# RUN: llvm-sim -o %t %S/Inputs/sim1.ll
+# RUN: FileCheck %s < %t
+
+# Checking the output of a single module test.
+
+# CHECK: {
+# CHECK-NEXT:  "1": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 8,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 18,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "2": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 7,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 17,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "3": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 6,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 16,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "4": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 5,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 15,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "5": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 4,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 14,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ]
+# CHECK-NEXT: }

diff  --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test
new file mode 100644
index 000000000000..4e04682e294e
--- /dev/null
+++ b/llvm/test/tools/llvm-sim/single-sim.test
@@ -0,0 +1,56 @@
+# RUN: llvm-sim -o - %S/Inputs/sim1.ll | FileCheck %s
+
+# Checking the output of a single module test.
+
+# CHECK: {
+# CHECK-NEXT:  "1": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 8,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 18,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "2": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 7,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 17,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "3": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 6,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 16,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "4": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 5,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 15,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ],
+# CHECK-NEXT:  "5": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 4,
+# CHECK-NEXT:    "end": 9
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:    "start": 14,
+# CHECK-NEXT:    "end": 19
+# CHECK-NEXT:   }
+# CHECK-NEXT:  ]
+# CHECK-NEXT: }

diff  --git a/llvm/tools/llvm-sim/CMakeLists.txt b/llvm/tools/llvm-sim/CMakeLists.txt
new file mode 100644
index 000000000000..76299050392a
--- /dev/null
+++ b/llvm/tools/llvm-sim/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  Analysis
+  IRReader)
+
+add_llvm_tool(llvm-sim
+  llvm-sim.cpp
+)

diff  --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp
new file mode 100644
index 000000000000..26e370ff30f1
--- /dev/null
+++ b/llvm/tools/llvm-sim/llvm-sim.cpp
@@ -0,0 +1,149 @@
+//===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This program finds similar sections of a Module, and exports them as a JSON
+// file.
+//
+// To find similarities contained across multiple modules, please use llvm-link
+// first to merge the modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IRSimilarityIdentifier.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+using namespace IRSimilarity;
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
+                                           cl::init("-"),
+                                           cl::value_desc("filename"));
+
+static cl::opt<std::string> InputSourceFile(cl::Positional,
+                                            cl::desc("<Source file>"),
+                                            cl::init("-"),
+                                            cl::value_desc("filename"));
+
+/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
+///
+/// \param I - The Instruction to find the instruction number for.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns The instruction number for \p I if it exists.
+Optional<unsigned>
+getPositionInModule(const Instruction *I,
+                    const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+  assert(I && "Instruction is nullptr!");
+  DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
+  if (It == LLVMInstNum.end())
+    return None;
+  return It->second;
+}
+
+/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
+///
+/// \param FilePath - The path to the output location.
+/// \param SimSections - The similarity groups to process.
+/// \param LLVMInstNum - The mapping of Instructions to their location in the
+/// module represented by an unsigned integer.
+/// \returns A nonzero error code if there was a failure creating the file.
+std::error_code
+exportToFile(const StringRef FilePath,
+             const SimilarityGroupList &SimSections,
+             const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
+  std::error_code EC;
+  std::unique_ptr<ToolOutputFile> Out(
+      new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
+  if (EC)
+    return EC;
+
+  json::OStream J(Out->os(), 1);
+  J.objectBegin();
+
+  unsigned SimOption = 1;
+  // Process each list of SimilarityGroups organized by the Module.
+  for (const SimilarityGroup &G : SimSections) {
+    std::string SimOptionStr = std::to_string(SimOption);
+    J.attributeBegin(SimOptionStr);
+    J.arrayBegin();
+    // For each file there is a list of the range where the similarity
+    // exists.
+    for (const IRSimilarityCandidate &C : G) {
+      Optional<unsigned> Start =
+          getPositionInModule((*C.front()).Inst, LLVMInstNum);
+      Optional<unsigned> End =
+          getPositionInModule((*C.back()).Inst, LLVMInstNum);
+
+      assert(Start.hasValue() &&
+             "Could not find instruction number for first instruction");
+      assert(End.hasValue() &&
+             "Could not find instruction number for last instruction");
+
+      J.object([&] {
+        J.attribute("start", Start.getValue());
+        J.attribute("end", End.getValue());
+      });
+    }
+    J.arrayEnd();
+    J.attributeEnd();
+    SimOption++;
+  }
+  J.objectEnd();
+
+  Out->keep();
+
+  return EC;
+}
+
+int main(int argc, const char *argv[]) {
+  InitLLVM X(argc, argv);
+
+  cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
+
+  LLVMContext CurrContext;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> ModuleToAnalyze =
+      parseIRFile(InputSourceFile, Err, CurrContext);
+
+  if (!ModuleToAnalyze) {
+    Err.print(argv[0], errs());
+    return 1;
+  }
+
+  // Mapping from an Instruction pointer to its occurrence in a sequential
+  // list of all the Instructions in a Module.
+  DenseMap<Instruction *, unsigned> LLVMInstNum;
+
+  // We give each instruction a number, which gives us a start and end value
+  // for the beginning and end of each IRSimilarityCandidate.
+  unsigned InstructionNumber = 1;
+  for (Function &F : *ModuleToAnalyze)
+    for (BasicBlock &BB : F)
+      for (Instruction &I : BB.instructionsWithoutDebug())
+        LLVMInstNum[&I]= InstructionNumber++;
+
+  // The similarity identifier we will use to find the similar sections.
+  IRSimilarityIdentifier SimIdent;
+  SimilarityGroupList SimilaritySections =
+      SimIdent.findSimilarity(*ModuleToAnalyze);
+
+  std::error_code E =
+      exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
+  if (E) {
+    errs() << argv[0] << ": " << E.message() << '\n';
+    return 2;
+  }
+
+  return 0;
+}


        


More information about the llvm-commits mailing list