[llvm] [llvm-remarkutil] Add an instruction-mix tool (PR #140598)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Mon May 19 12:26:02 PDT 2025


https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/140598

The new tool constructs a histogram of instruction frequencies, optionally filtered by function name via a regex.  It can display in either a human-readable table format, or machine-readable CSV.

>From e0dce31ff74e3265cb8ea12a71ecbdbce72357f5 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Mon, 19 May 2025 12:18:12 -0700
Subject: [PATCH] [llvm-remarkutil] Add an instruction-mix tool

The new tool constructs a histogram of instruction frequencies, optionally
filtered by function name via a regex.  It can display in either a
human-readable table format, or machine-readable CSV.
---
 .../Inputs/instruction-mix.yaml               |  27 ++++
 .../llvm-remarkutil/broken-yaml-remark.test   |   1 +
 .../tools/llvm-remarkutil/empty-file.test     |   5 +
 .../llvm-remarkutil/instruction-mix.test      |  22 +++
 llvm/tools/llvm-remarkutil/CMakeLists.txt     |   1 +
 .../llvm-remarkutil/RemarkInstructionMix.cpp  | 125 ++++++++++++++++++
 6 files changed, 181 insertions(+)
 create mode 100644 llvm/test/tools/llvm-remarkutil/Inputs/instruction-mix.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/instruction-mix.test
 create mode 100644 llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp

diff --git a/llvm/test/tools/llvm-remarkutil/Inputs/instruction-mix.yaml b/llvm/test/tools/llvm-remarkutil/Inputs/instruction-mix.yaml
new file mode 100644
index 0000000000000..f798454d317fa
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/Inputs/instruction-mix.yaml
@@ -0,0 +1,27 @@
+--- !Analysis
+Pass:            asm-printer
+Name:            InstructionMix
+Function:        home
+Args:
+  - INST_nop: '1'
+  - INST_add: '3'
+  - INST_mul: '5'
+...
+--- !Analysis
+Pass:            asm-printer
+Name:            InstructionMix
+Function:        homeowner
+Args:
+  - INST_nop: '2'
+  - INST_add: '4'
+  - INST_mul: '6'
+...
+--- !Analysis
+Pass:            asm-printer
+Name:            InstructionMix
+Function:        meow
+Args:
+  - INST_nop: '7'
+  - INST_add: '8'
+  - INST_mul: '9'
+...
diff --git a/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test b/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test
index 0f06506603363..464d0b80c4ad0 100644
--- a/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test
+++ b/llvm/test/tools/llvm-remarkutil/broken-yaml-remark.test
@@ -1,5 +1,6 @@
 RUN: not llvm-remarkutil yaml2bitstream %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s
 RUN: not llvm-remarkutil instruction-count --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s
+RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s
 RUN: not llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s
 RUN: not llvm-remarkutil count --parser=yaml %p/Inputs/broken-remark -o - 2>&1 | FileCheck %s
 
diff --git a/llvm/test/tools/llvm-remarkutil/empty-file.test b/llvm/test/tools/llvm-remarkutil/empty-file.test
index abbf8e02cfa30..bdc5fcf87f7bf 100644
--- a/llvm/test/tools/llvm-remarkutil/empty-file.test
+++ b/llvm/test/tools/llvm-remarkutil/empty-file.test
@@ -1,9 +1,11 @@
 RUN: not llvm-remarkutil yaml2bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER
 RUN: not llvm-remarkutil instruction-count --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER
+RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER
 RUN: not llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER
 RUN: not llvm-remarkutil count --parser=yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --check-prefix=YAMLPARSER
 RUN: llvm-remarkutil bitstream2yaml %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=BITSTREAM2YAML
 RUN: llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM
+RUN: llvm-remarkutil instruction-mix --parser=bitstream %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM
 RUN: llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM
 RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM
 
@@ -20,3 +22,6 @@ RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | F
 
 ; COUNTBITSTREAM-LABEL: Source,Count
 ; COUNTBITSTREAM-EMPTY:
+
+; MIXBITSTREAM-LABEL: Instruction,Count
+; MIXBITSTREAM-EMPTY:
diff --git a/llvm/test/tools/llvm-remarkutil/instruction-mix.test b/llvm/test/tools/llvm-remarkutil/instruction-mix.test
new file mode 100644
index 0000000000000..8abb760ca37f9
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/instruction-mix.test
@@ -0,0 +1,22 @@
+RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml | FileCheck %s
+RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix --parser=bitstream | FileCheck %s
+RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=human | FileCheck %s
+RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=csv | FileCheck %s --check-prefix=CSV
+RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --filter=meow | FileCheck %s --check-prefix=MEOW
+
+; CHECK-LABEL: Instruction Count
+; CHECK-NEXT:  ----------- -----
+; CHECK-NEXT:  mul         20
+; CHECK-NEXT:  add         15
+; CHECK-NEXT:  nop         10
+
+; CSV-LABEL: Instruction,Count
+; CSV-NEXT: mul,20
+; CSV-NEXT: add,15
+; CSV-NEXT: nop,10
+
+; MEOW:      Instruction Count
+; MEOW-NEXT: ----------- -----
+; MEOW-NEXT: mul         15
+; MEOW-NEXT: add         12
+; MEOW-NEXT: nop         9
\ No newline at end of file
diff --git a/llvm/tools/llvm-remarkutil/CMakeLists.txt b/llvm/tools/llvm-remarkutil/CMakeLists.txt
index 48aeb9397cda1..ed398ad272024 100644
--- a/llvm/tools/llvm-remarkutil/CMakeLists.txt
+++ b/llvm/tools/llvm-remarkutil/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_tool(llvm-remarkutil
   RemarkConvert.cpp
   RemarkCount.cpp
   RemarkCounter.cpp
+  RemarkInstructionMix.cpp
   RemarkSizeDiff.cpp
   RemarkUtil.cpp
   RemarkUtilHelpers.cpp
diff --git a/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp
new file mode 100644
index 0000000000000..e4373640a75ee
--- /dev/null
+++ b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp
@@ -0,0 +1,125 @@
+
+#include "RemarkUtilHelpers.h"
+#include "RemarkUtilRegistry.h"
+
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Regex.h"
+
+#include <numeric>
+
+using namespace llvm;
+using namespace remarks;
+using namespace llvm::remarkutil;
+
+namespace instructionmix {
+
+static cl::SubCommand
+    InstructionMix("instruction-mix",
+                   "Instruction Mix (requires asm-printer remarks)");
+
+static cl::opt<std::string>
+    FunctionFilter("filter", cl::sub(InstructionMix), cl::init(".*"),
+                   cl::value_desc("filter_regex"),
+                   cl::desc("regex to filter functions with"));
+
+enum ReportStyleOptions { human_output, csv_output };
+static cl::opt<ReportStyleOptions> ReportStyle(
+    "report_style", cl::sub(InstructionMix),
+    cl::init(ReportStyleOptions::human_output),
+    cl::desc("Choose the report output format:"),
+    cl::values(clEnumValN(human_output, "human", "Human-readable format"),
+               clEnumValN(csv_output, "csv", "CSV format")));
+
+INPUT_FORMAT_COMMAND_LINE_OPTIONS(InstructionMix)
+INPUT_OUTPUT_COMMAND_LINE_OPTIONS(InstructionMix)
+DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(InstructionMix)
+
+static Error tryInstructionMix() {
+  auto MaybeOF =
+      getOutputFileWithFlags(OutputFileName, sys::fs::OF_TextWithCRLF);
+  if (!MaybeOF)
+    return MaybeOF.takeError();
+
+  auto OF = std::move(*MaybeOF);
+  auto MaybeBuf = getInputMemoryBuffer(InputFileName);
+  if (!MaybeBuf)
+    return MaybeBuf.takeError();
+  auto MaybeParser = createRemarkParser(InputFormat, (*MaybeBuf)->getBuffer());
+  if (!MaybeParser)
+    return MaybeParser.takeError();
+
+  Regex Filter(FunctionFilter);
+
+  // Collect the histogram of instruction counts.
+  std::unordered_map<std::string, unsigned> Histogram;
+  auto &Parser = **MaybeParser;
+  auto MaybeRemark = Parser.next();
+  for (; MaybeRemark; MaybeRemark = Parser.next()) {
+    auto &Remark = **MaybeRemark;
+    if (Remark.RemarkName != "InstructionMix")
+      continue;
+    if (!Filter.match(Remark.FunctionName))
+      continue;
+    for (auto &Arg : Remark.Args) {
+      StringRef Key = Arg.Key;
+      if (!Key.consume_front("INST_"))
+        continue;
+      unsigned Val = 0;
+      bool ParseError = Arg.Val.getAsInteger(10, Val);
+      assert(!ParseError);
+      (void)ParseError;
+      Histogram[std::string(Key)] += Val;
+    }
+  }
+
+  // Sort it.
+  using MixEntry = std::pair<std::string, unsigned>;
+  llvm::SmallVector<MixEntry> Mix(Histogram.begin(), Histogram.end());
+  std::sort(Mix.begin(), Mix.end(), [](const auto &LHS, const auto &RHS) {
+    return LHS.second > RHS.second;
+  });
+
+  // Print the results.
+  switch (ReportStyle) {
+  case human_output: {
+    formatted_raw_ostream FOS(OF->os());
+    size_t MaxMnemonic =
+        std::accumulate(Mix.begin(), Mix.end(), StringRef("Instruction").size(),
+                        [](size_t MaxMnemonic, const MixEntry &Elt) {
+                          return std::max(MaxMnemonic, Elt.first.length());
+                        });
+    unsigned MaxValue = std::accumulate(
+        Mix.begin(), Mix.end(), 0, [](unsigned MaxValue, const MixEntry &Elt) {
+          return std::max(MaxValue, Elt.second);
+        });
+    unsigned ValueWidth = log10(MaxValue) + 1;
+    FOS << "Instruction";
+    FOS.PadToColumn(MaxMnemonic + 1) << "Count\n";
+    FOS << "-----------";
+    FOS.PadToColumn(MaxMnemonic + 1) << "-----\n";
+    for (const auto &[Inst, Count] : Mix) {
+      FOS << Inst;
+      FOS.PadToColumn(MaxMnemonic + 1)
+          << " " << format_decimal(Count, ValueWidth) << "\n";
+    }
+  } break;
+  case csv_output: {
+    OF->os() << "Instruction,Count\n";
+    for (const auto &[Inst, Count] : Mix)
+      OF->os() << Inst << "," << Count << "\n";
+  } break;
+  }
+
+  auto E = MaybeRemark.takeError();
+  if (!E.isA<EndOfFileError>())
+    return E;
+  consumeError(std::move(E));
+  OF->keep();
+  return Error::success();
+}
+
+static CommandRegistration InstructionMixReg(&InstructionMix,
+                                             tryInstructionMix);
+
+} // namespace instructionmix
\ No newline at end of file



More information about the llvm-commits mailing list