[llvm] Introduce llvmremark util diff command (PR #85007)

Zain Jaffal via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 12 17:59:15 PDT 2024


https://github.com/zjaffal created https://github.com/llvm/llvm-project/pull/85007

This tool is a generic tool to compare between two remark files and give the summary of the difference between two remark versions. 

The tool organises the remarks by debug location and displays the difference in arguments and remark type for each remark with the same header i.e remark name, function name and pass. 

This is useful for cases where remark type difference or argument change indicate a regression like in the vectorize remarks. 

>From ec9e27d477b16ad83e72e188befc12409891124e Mon Sep 17 00:00:00 2001
From: Zain Jaffal <z_jaffal at apple.com>
Date: Fri, 29 Sep 2023 17:40:37 +0100
Subject: [PATCH 1/2] [RemarkUtil] Introduce Remark diff

---
 .../llvm-remarkutil/diff/1-loc-2-args.test    |  15 +
 .../diff/Inputs/1-loc-2-args-2.yaml           |   8 +
 .../diff/Inputs/1-loc-2-args-3.yaml           |   8 +
 .../diff/Inputs/1-loc-2-args.yaml             |   8 +
 .../diff/Inputs/empty-file.yaml               |   0
 .../diff/Inputs/remark-no-debug-loc.yaml      |   0
 .../diff/Inputs/remarks-missed.yaml           |   8 +
 .../diff/Inputs/remarks-passed.yaml           |   8 +
 .../diff/disjoined-remarks.test               |  26 +
 .../diff/remark-type-diff.test                |  16 +
 llvm/tools/llvm-remarkutil/CMakeLists.txt     |   1 +
 llvm/tools/llvm-remarkutil/RemarkCounter.cpp  |  68 +--
 llvm/tools/llvm-remarkutil/RemarkCounter.h    |  60 ---
 llvm/tools/llvm-remarkutil/RemarkDiff.cpp     | 445 ++++++++++++++++++
 llvm/tools/llvm-remarkutil/RemarkDiff.h       | 298 ++++++++++++
 .../llvm-remarkutil/RemarkUtilHelpers.cpp     |  64 +++
 .../tools/llvm-remarkutil/RemarkUtilHelpers.h | 112 +++++
 17 files changed, 1018 insertions(+), 127 deletions(-)
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/1-loc-2-args.test
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-2.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-3.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/empty-file.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/remark-no-debug-loc.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-missed.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-passed.yaml
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/disjoined-remarks.test
 create mode 100644 llvm/test/tools/llvm-remarkutil/diff/remark-type-diff.test
 create mode 100644 llvm/tools/llvm-remarkutil/RemarkDiff.cpp
 create mode 100644 llvm/tools/llvm-remarkutil/RemarkDiff.h

diff --git a/llvm/test/tools/llvm-remarkutil/diff/1-loc-2-args.test b/llvm/test/tools/llvm-remarkutil/diff/1-loc-2-args.test
new file mode 100644
index 00000000000000..b81e7ffed65758
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/1-loc-2-args.test
@@ -0,0 +1,15 @@
+RUN: llvm-remarkutil diff %p/Inputs/1-loc-2-args.yaml %p/Inputs/1-loc-2-args-2.yaml --parser=yaml | FileCheck %s 
+
+; CHECK-LABEL: File A: {{.*}}/Inputs/1-loc-2-args.yaml
+; CHECK: File B: {{.*}}/Inputs/1-loc-2-args-2.yaml
+; CHECK: path/to/anno.c:func0  Ln: 1 Col: 2
+; CHECK: --- Has the same header ---
+; CHECK: Name: RemarkName
+; CHECK: FunctionName: func0
+; CHECK: PassName: Pass
+; CHECK: Only at A >>>>
+; CHECK: String: 1
+; CHECK: =====
+; CHECK: Only at B <<<<
+; CHECK: String: 2
+; CHECK: =====
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-2.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-2.yaml
new file mode 100644
index 00000000000000..dc90c257a51e14
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-2.yaml
@@ -0,0 +1,8 @@
+--- !Analysis
+Pass:            Pass 
+Name:            RemarkName 
+DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+Function:        func0
+Args:
+  - String:   '2'
+  - String:          'Info'
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-3.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-3.yaml
new file mode 100644
index 00000000000000..e737623927fa8d
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args-3.yaml
@@ -0,0 +1,8 @@
+--- !Analysis
+Pass:            Pass 
+Name:            RemarkName 
+DebugLoc:        { File: path/to/anno2.c, Line: 1, Column: 2 }
+Function:        func0
+Args:
+  - String:   '2'
+  - String:          'Info'
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args.yaml
new file mode 100644
index 00000000000000..23b618289a7c5a
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/Inputs/1-loc-2-args.yaml
@@ -0,0 +1,8 @@
+--- !Analysis
+Pass:            Pass 
+Name:            RemarkName 
+DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+Function:        func0
+Args:
+  - String:   '1'
+  - String:          'Info'
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/empty-file.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/empty-file.yaml
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/remark-no-debug-loc.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/remark-no-debug-loc.yaml
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-missed.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-missed.yaml
new file mode 100644
index 00000000000000..b7f5d5870ec6d2
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-missed.yaml
@@ -0,0 +1,8 @@
+--- !Missed
+Pass:            Pass 
+Name:            RemarkName 
+DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+Function:        func0
+Args:
+  - String:   '2'
+  - String:          'Info2'
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-passed.yaml b/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-passed.yaml
new file mode 100644
index 00000000000000..818e97f3290ce8
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/Inputs/remarks-passed.yaml
@@ -0,0 +1,8 @@
+--- !Passed
+Pass:            Pass 
+Name:            RemarkName 
+DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+Function:        func0
+Args:
+  - String:   '2'
+  - String:          'Info2'
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-remarkutil/diff/disjoined-remarks.test b/llvm/test/tools/llvm-remarkutil/diff/disjoined-remarks.test
new file mode 100644
index 00000000000000..48e55f3eedcad2
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/disjoined-remarks.test
@@ -0,0 +1,26 @@
+RUN: llvm-remarkutil diff %p/Inputs/1-loc-2-args.yaml %p/Inputs/1-loc-2-args-3.yaml --parser=yaml | FileCheck %s 
+
+; CHECK-LABEL: File A: {{.*}}/Inputs/1-loc-2-args.yaml
+; CHECK: File B: {{.*}}/Inputs/1-loc-2-args-3.yaml
+; CHECK: ----------
+; CHECK: path/to/anno.c:func0  Ln: 1 Col: 2
+; CHECK: Only at A >>>>
+; CHECK: Name: RemarkName
+; CHECK: FunctionName: func0
+; CHECK: PassName: Pass
+; CHECK: Type: Analysis
+; CHECK: Args:
+; CHECK: 	String: 1
+; CHECK: 	String: Info
+; CHECK: =====
+; CHECK: ----------
+; CHECK: path/to/anno2.c:func0  Ln: 1 Col: 2
+; CHECK: Only at B <<<<
+; CHECK: Name: RemarkName
+; CHECK: FunctionName: func0
+; CHECK: PassName: Pass
+; CHECK: Type: Analysis
+; CHECK: Args:
+; CHECK: 	String: 2
+; CHECK: 	String: Info
+; CHECK: =====
diff --git a/llvm/test/tools/llvm-remarkutil/diff/remark-type-diff.test b/llvm/test/tools/llvm-remarkutil/diff/remark-type-diff.test
new file mode 100644
index 00000000000000..074d264ad1b01b
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/diff/remark-type-diff.test
@@ -0,0 +1,16 @@
+bin/llvm-remarkutil diff %p/Inputs/remarks-passed.yaml %p/Inputs/remarks-missed.yaml | FileCheck %s
+
+;CHECK-LABEL: File A: {{.*}}/Inputs/remarks-passed.yaml
+;CHECK: File B: {{.*}}/Inputs/remarks-missed.yaml
+;CHECK: ----------
+;CHECK: path/to/anno.c:func0  Ln: 1 Col: 2
+;CHECK: --- Has the same header ---
+;CHECK: Name: RemarkName
+;CHECK: FunctionName: func0
+;CHECK: PassName: Pass
+;CHECK: Only at A >>>>
+;CHECK: Type: Passed
+;CHECK: =====
+;CHECK: Only at B <<<<
+;CHECK: Type: Missed
+;CHECK: =====
\ No newline at end of file
diff --git a/llvm/tools/llvm-remarkutil/CMakeLists.txt b/llvm/tools/llvm-remarkutil/CMakeLists.txt
index 48aeb9397cda16..f0dc7ea1b2c8c6 100644
--- a/llvm/tools/llvm-remarkutil/CMakeLists.txt
+++ b/llvm/tools/llvm-remarkutil/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_tool(llvm-remarkutil
   RemarkConvert.cpp
   RemarkCount.cpp
   RemarkCounter.cpp
+  RemarkDiff.cpp
   RemarkSizeDiff.cpp
   RemarkUtil.cpp
   RemarkUtilHelpers.cpp
diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp
index dc0685f342886a..87692437c29674 100644
--- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp
@@ -24,6 +24,7 @@ static cl::SubCommand CountSub("count",
 
 INPUT_FORMAT_COMMAND_LINE_OPTIONS(CountSub)
 INPUT_OUTPUT_COMMAND_LINE_OPTIONS(CountSub)
+FILTER_COMMAND_LINE_OPTIONS(CountSub)
 
 static cl::list<std::string>
     Keys("args", cl::desc("Specify remark argument/s to count by."),
@@ -33,45 +34,6 @@ static cl::list<std::string> RKeys(
     cl::desc(
         "Specify remark argument/s to count (accepts regular expressions)."),
     cl::value_desc("arguments"), cl::sub(CountSub), cl::ValueOptional);
-static cl::opt<std::string>
-    RemarkNameOpt("remark-name",
-                  cl::desc("Optional remark name to filter collection by."),
-                  cl::ValueOptional, cl::sub(CountSub));
-static cl::opt<std::string>
-    PassNameOpt("pass-name", cl::ValueOptional,
-                cl::desc("Optional remark pass name to filter collection by."),
-                cl::sub(CountSub));
-
-static cl::opt<std::string> RemarkFilterArgByOpt(
-    "filter-arg-by", cl::desc("Optional remark arg to filter collection by."),
-    cl::ValueOptional, cl::sub(CountSub));
-static cl::opt<std::string>
-    RemarkNameOptRE("rremark-name",
-                    cl::desc("Optional remark name to filter collection by "
-                             "(accepts regular expressions)."),
-                    cl::ValueOptional, cl::sub(CountSub));
-static cl::opt<std::string>
-    RemarkArgFilterOptRE("rfilter-arg-by",
-                         cl::desc("Optional remark arg to filter collection by "
-                                  "(accepts regular expressions)."),
-                         cl::sub(CountSub), cl::ValueOptional);
-static cl::opt<std::string>
-    PassNameOptRE("rpass-name", cl::ValueOptional,
-                  cl::desc("Optional remark pass name to filter collection "
-                           "by (accepts regular expressions)."),
-                  cl::sub(CountSub));
-static cl::opt<Type> RemarkTypeOpt(
-    "remark-type", cl::desc("Optional remark type to filter collection by."),
-    cl::values(clEnumValN(Type::Unknown, "unknown", "UNKOWN"),
-               clEnumValN(Type::Passed, "passed", "PASSED"),
-               clEnumValN(Type::Missed, "missed", "MISSED"),
-               clEnumValN(Type::Analysis, "analysis", "ANALYSIS"),
-               clEnumValN(Type::AnalysisFPCommute, "analysis-fp-commute",
-                          "ANALYSIS_FP_COMMUTE"),
-               clEnumValN(Type::AnalysisAliasing, "analysis-aliasing",
-                          "ANALYSIS_ALIASING"),
-               clEnumValN(Type::Failure, "failure", "FAILURE")),
-    cl::init(Type::Failure), cl::sub(CountSub));
 static cl::opt<CountBy> CountByOpt(
     "count-by", cl::desc("Specify the property to collect remarks by."),
     cl::values(
@@ -111,34 +73,6 @@ static unsigned getValForKey(StringRef Key, const Remark &Remark) {
   return *RemarkArg->getValAsInt();
 }
 
-Error Filters::regexArgumentsValid() {
-  if (RemarkNameFilter && RemarkNameFilter->IsRegex)
-    if (auto E = checkRegex(RemarkNameFilter->FilterRE))
-      return E;
-  if (PassNameFilter && PassNameFilter->IsRegex)
-    if (auto E = checkRegex(PassNameFilter->FilterRE))
-      return E;
-  if (ArgFilter && ArgFilter->IsRegex)
-    if (auto E = checkRegex(ArgFilter->FilterRE))
-      return E;
-  return Error::success();
-}
-
-bool Filters::filterRemark(const Remark &Remark) {
-  if (RemarkNameFilter && !RemarkNameFilter->match(Remark.RemarkName))
-    return false;
-  if (PassNameFilter && !PassNameFilter->match(Remark.PassName))
-    return false;
-  if (RemarkTypeFilter)
-    return *RemarkTypeFilter == Remark.RemarkType;
-  if (ArgFilter) {
-    if (!any_of(Remark.Args,
-                [this](Argument Arg) { return ArgFilter->match(Arg.Val); }))
-      return false;
-  }
-  return true;
-}
-
 Error ArgumentCounter::getAllMatchingArgumentsInRemark(
     StringRef Buffer, ArrayRef<FilterMatcher> Arguments, Filters &Filter) {
   auto MaybeParser = createRemarkParser(InputFormat, Buffer);
diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.h b/llvm/tools/llvm-remarkutil/RemarkCounter.h
index 34d5bff7740556..3568ba6b88bb78 100644
--- a/llvm/tools/llvm-remarkutil/RemarkCounter.h
+++ b/llvm/tools/llvm-remarkutil/RemarkCounter.h
@@ -45,66 +45,6 @@ inline std::string groupByToStr(GroupBy GroupBy) {
   }
 }
 
-/// Filter object which can be either a string or a regex to match with the
-/// remark properties.
-struct FilterMatcher {
-  Regex FilterRE;
-  std::string FilterStr;
-  bool IsRegex;
-  FilterMatcher(std::string Filter, bool IsRegex) : IsRegex(IsRegex) {
-    if (IsRegex)
-      FilterRE = Regex(Filter);
-    else
-      FilterStr = Filter;
-  }
-
-  bool match(StringRef StringToMatch) const {
-    if (IsRegex)
-      return FilterRE.match(StringToMatch);
-    return FilterStr == StringToMatch.trim().str();
-  }
-};
-
-/// Filter out remarks based on remark properties based on name, pass name,
-/// argument and type.
-struct Filters {
-  std::optional<FilterMatcher> RemarkNameFilter;
-  std::optional<FilterMatcher> PassNameFilter;
-  std::optional<FilterMatcher> ArgFilter;
-  std::optional<Type> RemarkTypeFilter;
-  /// Returns a filter object if all the arguments provided are valid regex
-  /// types otherwise return an error.
-  static Expected<Filters>
-  createRemarkFilter(std::optional<FilterMatcher> RemarkNameFilter,
-                     std::optional<FilterMatcher> PassNameFilter,
-                     std::optional<FilterMatcher> ArgFilter,
-                     std::optional<Type> RemarkTypeFilter) {
-    Filters Filter;
-    Filter.RemarkNameFilter = std::move(RemarkNameFilter);
-    Filter.PassNameFilter = std::move(PassNameFilter);
-    Filter.ArgFilter = std::move(ArgFilter);
-    Filter.RemarkTypeFilter = std::move(RemarkTypeFilter);
-    if (auto E = Filter.regexArgumentsValid())
-      return std::move(E);
-    return std::move(Filter);
-  }
-  /// Returns true if \p Remark satisfies all the provided filters.
-  bool filterRemark(const Remark &Remark);
-
-private:
-  /// Check if arguments can be parsed as valid regex types.
-  Error regexArgumentsValid();
-};
-
-/// Convert Regex string error to an error object.
-inline Error checkRegex(const Regex &Regex) {
-  std::string Error;
-  if (!Regex.isValid(Error))
-    return createStringError(make_error_code(std::errc::invalid_argument),
-                             Twine("Regex: ", Error));
-  return Error::success();
-}
-
 /// Abstract counter class used to define the general required methods for
 /// counting a remark.
 struct Counter {
diff --git a/llvm/tools/llvm-remarkutil/RemarkDiff.cpp b/llvm/tools/llvm-remarkutil/RemarkDiff.cpp
new file mode 100644
index 00000000000000..d40f63b2df8a98
--- /dev/null
+++ b/llvm/tools/llvm-remarkutil/RemarkDiff.cpp
@@ -0,0 +1,445 @@
+//===-------------- RemarkDiff.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Diffs remarks between two remark files.
+/// The tool offers different modes for comparing two versions of remarks.
+/// 1. Look through common remarks between two files.
+/// 2. Compare the remark type. This is useful to check if an optimzation
+/// changed from passing to failing.
+/// 3. Compare remark arguments. This is useful to check if a remark argument
+/// changed after some compiler change.
+///
+/// The results are presented as a json file.
+///
+//===----------------------------------------------------------------------===//
+
+#include "RemarkDiff.h"
+#include "llvm/Support/JSON.h"
+#include <utility>
+
+using namespace llvm;
+using namespace remarks;
+using namespace llvm::remarkutil;
+
+static cl::SubCommand DiffSub("diff",
+                              "diff remarks based on specified properties.");
+static cl::opt<std::string> RemarkFileA(cl::Positional,
+                                        cl::desc("<remarka_file>"),
+                                        cl::Required, cl::sub(DiffSub));
+static cl::opt<std::string> RemarkFileB(cl::Positional,
+                                        cl::desc("<remarkb_file>"),
+                                        cl::Required, cl::sub(DiffSub));
+
+static cl::opt<bool> Verbose(
+    "v", cl::init(false),
+    cl::desc("Output detailed difference for remarks. By default the tool will "
+             "only show the remark name, type and location. If the flag is "
+             "added we display the arguments that are different."),
+    cl::sub(DiffSub));
+static cl::opt<bool>
+    ShowArgDiffOnly("show-arg-diff-only", cl::init(false),
+                    cl::desc("Show only the remarks that have the same header "
+                             "and differ in arguments"),
+                    cl::sub(DiffSub));
+static cl::opt<bool> OnlyShowCommonRemarks(
+    "only-show-common-remarks", cl::init(false),
+    cl::desc("Ignore any remarks that don't exist in both <remarka_file> and "
+             "<remarkb_file>."),
+    cl::sub(DiffSub));
+static cl::opt<bool> ShowOnlyDifferentRemarks(
+    "only-show-different-remarks", cl::init(false),
+    cl::desc("Show remarks that are exclusively at either A or B"),
+    cl::sub(DiffSub));
+static cl::opt<bool> ShowOnlyA("only-show-a", cl::init(false),
+                               cl::desc("Show remarks that are only in A"),
+                               cl::sub(DiffSub));
+
+static cl::opt<bool> ShowOnlyB("only-show-b", cl::init(false),
+                               cl::desc("Show remarks that are only in B"),
+                               cl::sub(DiffSub));
+static cl::opt<bool> ShowRemarkTypeDiffOnly(
+    "show-remark-type-diff-only", cl::init(false),
+    cl::desc("Only show diff if remarks have the same header but different "
+             "type"),
+    cl::sub(DiffSub));
+
+static cl::opt<Format> InputFormat(
+    "parser", cl::desc("Input remark format to parse"),
+    cl::values(clEnumValN(Format::YAML, "yaml", "YAML"),
+               clEnumValN(Format::Bitstream, "bitstream", "Bitstream")),
+    cl::sub(DiffSub));
+static cl::opt<ReportStyleOptions> ReportStyle(
+    "report_style", cl::sub(DiffSub),
+    cl::init(ReportStyleOptions::human_output),
+    cl::desc("Choose the report output format:"),
+    cl::values(clEnumValN(human_output, "human", "Human-readable format"),
+               clEnumValN(json_output, "json", "JSON format")));
+static cl::opt<std::string> OutputFileName("o", cl::init("-"), cl::sub(DiffSub),
+                                           cl::desc("Output"),
+                                           cl::value_desc("file"));
+FILTER_COMMAND_LINE_OPTIONS(DiffSub)
+
+void RemarkArgInfo::print(raw_ostream &OS) const {
+  OS << Key << ": " << Val << "\n";
+}
+
+void RemarkInfo::printHeader(raw_ostream &OS) const {
+  OS << "Name: " << RemarkName << "\n";
+  OS << "FunctionName: " << FunctionName << "\n";
+  OS << "PassName: " << PassName << "\n";
+}
+
+void RemarkInfo::print(raw_ostream &OS) const {
+  printHeader(OS);
+  OS << "Type: " << typeToStr(RemarkType) << "\n";
+  if (!Args.empty()) {
+    OS << "Args:\n";
+    for (auto Arg : Args)
+      OS << "\t" << Arg;
+  }
+}
+
+void DiffAtRemark::print(raw_ostream &OS) const {
+  BaseRemark.printHeader(OS);
+  if (RemarkTypeDiff) {
+    OS << "Only at A >>>>\n";
+    OS << "Type: " << typeToStr(RemarkTypeDiff->first) << "\n";
+    OS << "=====\n";
+    OS << "Only at B <<<<\n";
+    OS << "Type: " << typeToStr(RemarkTypeDiff->second) << "\n";
+    OS << "=====\n";
+  }
+
+  if (!OnlyA.empty()) {
+    OS << "Only at A >>>>\n";
+    unsigned Idx = 0;
+    for (auto &R : OnlyA) {
+      OS << R;
+      if (Idx < OnlyA.size() - 1)
+        OS << "\n";
+      Idx++;
+    }
+    OS << "=====\n";
+  }
+  if (!OnlyB.empty()) {
+    OS << "Only at B <<<<\n";
+    unsigned Idx = 0;
+    for (auto &R : OnlyB) {
+      OS << R;
+      if (Idx < OnlyB.size() - 1)
+        OS << "\n";
+      Idx++;
+    }
+    OS << "=====\n";
+  }
+  if (Verbose)
+    for (auto &R : InBoth)
+      OS << R << "\n";
+}
+
+void DiffAtLoc::print(raw_ostream &OS) {
+  if (!OnlyA.empty()) {
+    OS << "Only at A >>>>\n";
+    unsigned Idx = 0;
+    for (auto &R : OnlyA) {
+      OS << R;
+      if (Idx < OnlyA.size() - 1)
+        OS << "\n";
+      Idx++;
+    }
+    OS << "=====\n";
+  }
+
+  if (!OnlyB.empty()) {
+    OS << "Only at B <<<<\n";
+    unsigned Idx = 0;
+    for (auto &R : OnlyB) {
+      OS << R;
+      if (Idx < OnlyB.size() - 1)
+        OS << "\n";
+      Idx++;
+    }
+    OS << "=====\n";
+  }
+
+  if (!HasTheSameHeader.empty()) {
+    OS << "--- Has the same header ---\n";
+    for (auto &R : HasTheSameHeader)
+      R.print(OS);
+  }
+}
+
+/// \returns json array representation of a vecotor of remark arguments.
+static json::Array remarkArgsToJson(SmallVectorImpl<RemarkArgInfo> &Args) {
+  json::Array ArgArray;
+  for (auto Arg : Args) {
+    json::Object ArgPair({{Arg.Key, Arg.Val}});
+    ArgArray.push_back(std::move(ArgPair));
+  }
+  return ArgArray;
+}
+
+/// \returns remark representation as a json object.
+static json::Object remarkToJSON(RemarkInfo &Remark) {
+  json::Object RemarkJSON;
+  RemarkJSON["RemarkName"] = Remark.RemarkName;
+  RemarkJSON["FunctionName"] = Remark.FunctionName;
+  RemarkJSON["PassName"] = Remark.PassName;
+  RemarkJSON["RemarkType"] = typeToStr(Remark.RemarkType);
+  if (Verbose)
+    RemarkJSON["Args"] = remarkArgsToJson(Remark.Args);
+  return RemarkJSON;
+}
+
+json::Object DiffAtRemark::toJson() {
+  json::Object Object;
+  Object["FunctionName"] = BaseRemark.FunctionName;
+  Object["PassName"] = BaseRemark.PassName;
+  Object["RemarkName"] = BaseRemark.RemarkName;
+  // display remark type if it is the same between the two remarks.
+  if (!RemarkTypeDiff)
+    Object["RemarkType"] = typeToStr(BaseRemark.RemarkType);
+  json::Array InBothJSON;
+  json::Array OnlyAJson;
+  json::Array OnlyBJson;
+  for (auto Arg : InBoth) {
+    json::Object ArgPair({{Arg.Key, Arg.Val}});
+    InBothJSON.push_back(std::move(ArgPair));
+  }
+  for (auto Arg : OnlyA) {
+    json::Object ArgPair({{Arg.Key, Arg.Val}});
+    OnlyAJson.push_back(std::move(ArgPair));
+  }
+  for (auto Arg : OnlyB) {
+    json::Object ArgPair({{Arg.Key, Arg.Val}});
+    OnlyBJson.push_back(std::move(ArgPair));
+  }
+  json::Object Diff;
+  if (RemarkTypeDiff) {
+    Diff["RemarkTypeA"] = typeToStr(RemarkTypeDiff->first);
+    Diff["RemarkTypeB"] = typeToStr(RemarkTypeDiff->second);
+  }
+
+  // Only display common remark arguments if verbose is passed.
+  if (Verbose)
+    Object["ArgsInBoth"] = remarkArgsToJson(InBoth);
+  if (!OnlyAJson.empty())
+    Diff["ArgsAtA"] = remarkArgsToJson(OnlyA);
+  if (!OnlyBJson.empty())
+    Diff["ArgsAtB"] = remarkArgsToJson(OnlyB);
+  Object["Diff"] = std::move(Diff);
+  return Object;
+}
+json::Object DiffAtLoc::toJson() {
+  json::Object Obj;
+  json::Array DiffObj;
+  json::Array OnlyAObj;
+  json::Array OnlyBObj;
+  json::Array HasSameHeaderObj;
+  for (auto R : OnlyA)
+    OnlyAObj.push_back(remarkToJSON(R));
+  for (auto R : OnlyB)
+    OnlyBObj.push_back(remarkToJSON(R));
+  for (auto R : HasTheSameHeader)
+    HasSameHeaderObj.push_back(R.toJson());
+  if (!OnlyShowCommonRemarks) {
+    Obj["OnlyA"] = std::move(OnlyAObj);
+    Obj["OnlyB"] = std::move(OnlyBObj);
+  }
+  Obj["HasSameHeaderObj"] = std::move(HasSameHeaderObj);
+  return Obj;
+}
+
+/// Parse the a remark buffer and generate a set of remarks ordered by the debug
+/// location.
+static Error parseRemarkFile(
+    std::unique_ptr<RemarkParser> &Parser,
+    MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> &DebugLoc2RemarkMap,
+    Filters &Filter) {
+  auto MaybeRemark = Parser->next();
+  // Use a set vector to remove duplicate entries in the remark file.
+  MapVector<DebugLocation, SmallSet<RemarkInfo, 4>> DebugLoc2RemarkMapSet;
+  for (; MaybeRemark; MaybeRemark = Parser->next()) {
+    auto &Remark = **MaybeRemark;
+    if (!Filter.filterRemark(Remark))
+      continue;
+    std::string SourceFilePath = "";
+    unsigned SourceLine = 0;
+    unsigned SourceColumn = 0;
+    if (Remark.Loc.has_value()) {
+      SourceFilePath = Remark.Loc->SourceFilePath.str();
+      SourceLine = Remark.Loc->SourceLine;
+      SourceColumn = Remark.Loc->SourceColumn;
+    }
+
+    DebugLocation Key(SourceFilePath, Remark.FunctionName, SourceLine,
+                      SourceColumn);
+    auto Iter = DebugLoc2RemarkMapSet.insert({Key, {}});
+    Iter.first->second.insert(Remark);
+  }
+  for (auto [DebugLocation, RemarkSet] : DebugLoc2RemarkMapSet)
+    DebugLoc2RemarkMap[DebugLocation] = {RemarkSet.begin(), RemarkSet.end()};
+
+  auto E = MaybeRemark.takeError();
+  if (!E.isA<remarks::EndOfFileError>())
+    return E;
+  consumeError(std::move(E));
+  return Error::success();
+}
+
+void Diff::computeDiffAtLoc(DebugLocation Loc, ArrayRef<RemarkInfo> RemarksA,
+                            ArrayRef<RemarkInfo> RemarksB) {
+
+  // A set of remarks where either they have a remark at the other file
+  // equaling them or share the same header. This is used to reduce the
+  // duplicates when looking at a location. If a remark has a counterpart in
+  // the other file then we aren't interested if it shares the same header
+  // with another remark.
+  DiffAtLoc DiffLoc(Loc);
+  SmallSet<RemarkInfo, 4> FoundRemarks;
+  SmallVector<std::pair<RemarkInfo, RemarkInfo>, 4> HasSameHeader;
+  // First look through the remarks that are exactly equal in the two files.
+  for (auto &RA : RemarksA)
+    for (auto &RB : RemarksB)
+      if (RA == RB)
+        FoundRemarks.insert(RA);
+  for (auto &RA : RemarksA) {
+    // skip
+    if (FoundRemarks.contains(RA))
+      continue;
+    for (auto &RB : RemarksB) {
+      if (FoundRemarks.contains(RB))
+        continue;
+      if (RA.hasSameHeader(RB)) {
+        HasSameHeader.push_back({RA, RB});
+        FoundRemarks.insert(RA);
+        FoundRemarks.insert(RB);
+      }
+    }
+  }
+
+  for (auto &RA : RemarksA) {
+    if (!FoundRemarks.contains(RA) && DiffConfig.AddRemarksFromA)
+      DiffLoc.OnlyA.push_back(RA);
+  }
+  for (auto &RB : RemarksB) {
+    if (!FoundRemarks.contains(RB) && DiffConfig.AddRemarksFromB)
+      DiffLoc.OnlyB.push_back(RB);
+  }
+  // Discard any shared remarks and only display uniquly different remarks
+  // between A and B.
+  if (!DiffConfig.ShowCommonRemarks) {
+    DiffAtLocs.push_back(DiffLoc);
+    return;
+  }
+
+  // calculate the diff at each shared remark.
+  for (auto &[RA, RB] : HasSameHeader)
+    DiffLoc.HasTheSameHeader.push_back({RA, RB, DiffConfig});
+  DiffAtLocs.push_back(DiffLoc);
+}
+
+void Diff::computeDiff(
+    SetVector<DebugLocation> &DebugLocs,
+    MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> &DebugLoc2RemarkA,
+    MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> &DebugLoc2RemarkB) {
+  // Add all debug locs from file a and file b to a unique set of Locations.
+  for (const DebugLocation &Loc : DebugLocs) {
+    SmallVector<RemarkInfo, 4> RemarksLocAIt = DebugLoc2RemarkA.lookup(Loc);
+    SmallVector<RemarkInfo, 4> RemarksLocBIt = DebugLoc2RemarkB.lookup(Loc);
+    computeDiffAtLoc(Loc, RemarksLocAIt, RemarksLocBIt);
+  }
+}
+
+Error Diff::printDiff(StringRef InputFileNameA, StringRef InputFileNameB) {
+  // Create the output buffer.
+  auto MaybeOF = getOutputFileWithFlags(OutputFileName,
+                                        /*Flags = */ sys::fs::OF_TextWithCRLF);
+  if (!MaybeOF)
+    return MaybeOF.takeError();
+  std::unique_ptr<ToolOutputFile> OF = std::move(*MaybeOF);
+  if (ReportStyle == ReportStyleOptions::human_output) {
+    OF->os() << "File A: " << InputFileNameA << "\n";
+    OF->os() << "File B: " << InputFileNameB << "\n";
+    for (auto LocDiff : DiffAtLocs) {
+      if (LocDiff.isEmpty())
+        continue;
+      OF->os() << "----------\n";
+      OF->os() << LocDiff.Loc.SourceFilePath << ":" << LocDiff.Loc.FunctionName
+               << "  Ln: " << LocDiff.Loc.SourceLine
+               << " Col: " << LocDiff.Loc.SourceColumn << "\n";
+      LocDiff.print(OF->os());
+    }
+  } else {
+
+    json::Object Output;
+    json::Object Files(
+        {{"A", InputFileNameA.str()}, {"B", InputFileNameB.str()}});
+    Output["Files"] = std::move(Files);
+    SmallVector<json::Object> Locs;
+    for (auto LocDiff : DiffAtLocs) {
+      Output[LocDiff.Loc.SourceFilePath] = json::Object(
+          {{LocDiff.Loc.FunctionName,
+            json::Object({{LocDiff.Loc.toString(), LocDiff.toJson()}})}});
+    }
+
+    json::OStream JOS(OF->os(), 2);
+    JOS.value(std::move(Output));
+    OF->os() << '\n';
+  }
+  OF->keep();
+  return Error::success();
+}
+
+static Error createRemarkDiff() {
+  // Get memory buffer for file a and file b.
+  auto RemarkAMaybeBuf = getInputMemoryBuffer(RemarkFileA);
+  if (!RemarkAMaybeBuf)
+    return RemarkAMaybeBuf.takeError();
+  auto RemarkBMaybeBuf = getInputMemoryBuffer(RemarkFileB);
+  if (!RemarkBMaybeBuf)
+    return RemarkBMaybeBuf.takeError();
+  StringRef BufferA = (*RemarkAMaybeBuf)->getBuffer();
+  StringRef BufferB = (*RemarkBMaybeBuf)->getBuffer();
+  // Create parsers for file a and file b remarks.
+  auto MaybeParser1 = createRemarkParserFromMeta(InputFormat, BufferA);
+  if (!MaybeParser1)
+    return MaybeParser1.takeError();
+  auto MaybeParser2 = createRemarkParserFromMeta(InputFormat, BufferB);
+  if (!MaybeParser2)
+    return MaybeParser2.takeError();
+  auto MaybeFilter = getRemarkFilter(
+      RemarkNameOpt, RemarkNameOptRE, PassNameOpt, PassNameOptRE, RemarkTypeOpt,
+      RemarkFilterArgByOpt, RemarkArgFilterOptRE);
+  if (!MaybeFilter)
+    return MaybeFilter.takeError();
+  auto &Filter = *MaybeFilter;
+  // Order the remarks based on their debug location and function name.
+  MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> DebugLoc2RemarkA;
+  MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> DebugLoc2RemarkB;
+  if (auto E = parseRemarkFile(*MaybeParser1, DebugLoc2RemarkA, Filter))
+    return E;
+  if (auto E = parseRemarkFile(*MaybeParser2, DebugLoc2RemarkB, Filter))
+    return E;
+  SetVector<DebugLocation> DebugLocs;
+  for (const auto &[Loc, _] : DebugLoc2RemarkA)
+    DebugLocs.insert(Loc);
+  for (const auto &[Loc, _] : DebugLoc2RemarkB)
+    DebugLocs.insert(Loc);
+  DiffConfigurator DiffConfig(ShowArgDiffOnly, OnlyShowCommonRemarks,
+                              ShowOnlyDifferentRemarks, ShowOnlyA, ShowOnlyB,
+                              ShowRemarkTypeDiffOnly);
+  Diff Diff(Filter, DiffConfig);
+  Diff.computeDiff(DebugLocs, DebugLoc2RemarkA, DebugLoc2RemarkB);
+  if (auto E = Diff.printDiff(RemarkFileA, RemarkFileB))
+    return E;
+  return Error::success();
+}
+
+static CommandRegistration DiffReg(&DiffSub, createRemarkDiff);
\ No newline at end of file
diff --git a/llvm/tools/llvm-remarkutil/RemarkDiff.h b/llvm/tools/llvm-remarkutil/RemarkDiff.h
new file mode 100644
index 00000000000000..46c0d0ba9f5894
--- /dev/null
+++ b/llvm/tools/llvm-remarkutil/RemarkDiff.h
@@ -0,0 +1,298 @@
+//===- RemarkDiff.h -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic tool to diff remarks based on properties
+//
+//===----------------------------------------------------------------------===//
+
+#include "RemarkUtilHelpers.h"
+#include "RemarkUtilRegistry.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+namespace llvm {
+namespace remarks {
+
+enum ReportStyleOptions { human_output, json_output };
+/// copy of Argument class using std::string instead of StringRef.
+struct RemarkArgInfo {
+  std::string Key;
+  std::string Val;
+  RemarkArgInfo(StringRef Key, StringRef Val)
+      : Key(Key.str()), Val(Val.str()) {}
+  void print(raw_ostream &OS) const;
+};
+
+hash_code hash_value(const RemarkArgInfo &Arg) {
+  return hash_combine(Arg.Key, Arg.Val);
+}
+
+/// A wrapper for Remark class that can be used for generating remark diff.
+struct RemarkInfo {
+  std::string RemarkName;
+  std::string FunctionName;
+  std::string PassName;
+  Type RemarkType;
+  SmallVector<RemarkArgInfo, 4> Args;
+  RemarkInfo();
+  RemarkInfo(std::string RemarkName, std::string FunctionName,
+             std::string PassName, Type RemarkType,
+             SmallVector<RemarkArgInfo, 4> &Args)
+      : RemarkName(RemarkName), FunctionName(FunctionName), PassName(PassName),
+        RemarkType(RemarkType), Args(Args) {}
+  RemarkInfo(Remark &Remark)
+      : RemarkName(Remark.RemarkName.str()),
+        FunctionName(Remark.FunctionName.str()),
+        PassName(Remark.PassName.str()), RemarkType(Remark.RemarkType) {
+    for (const auto &Arg : Remark.Args)
+      Args.push_back({Arg.Key.str(), Arg.Val.str()});
+  }
+
+  /// Check if the remark has the same name, function name and pass name as \p
+  /// RHS
+  bool hasSameHeader(const RemarkInfo &RHS) const {
+    return RemarkName == RHS.RemarkName && FunctionName == RHS.FunctionName &&
+           PassName == RHS.PassName;
+  };
+  void print(raw_ostream &OS) const;
+  void printHeader(raw_ostream &OS) const;
+};
+
+inline bool operator<(const RemarkArgInfo &LHS, const RemarkArgInfo &RHS) {
+  return std::make_tuple(LHS.Key, LHS.Val) < std::make_tuple(RHS.Key, RHS.Val);
+}
+
+inline bool operator<(const RemarkInfo &LHS, const RemarkInfo &RHS) {
+  return std::make_tuple(LHS.RemarkType, LHS.PassName, LHS.RemarkName,
+                         LHS.FunctionName, LHS.Args) <
+         std::make_tuple(RHS.RemarkType, RHS.PassName, RHS.RemarkName,
+                         RHS.FunctionName, RHS.Args);
+}
+
+inline bool operator==(const RemarkArgInfo &LHS, const RemarkArgInfo &RHS) {
+  return LHS.Key == RHS.Key && LHS.Val == RHS.Val;
+}
+
+inline bool operator==(const RemarkInfo &LHS, const RemarkInfo &RHS) {
+  return LHS.RemarkName == RHS.RemarkName &&
+         LHS.FunctionName == RHS.FunctionName && LHS.PassName == RHS.PassName &&
+         LHS.RemarkType == RHS.RemarkType && LHS.Args == RHS.Args;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const RemarkArgInfo &RemarkArgInfo) {
+  RemarkArgInfo.print(OS);
+  return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RemarkInfo &RemarkInfo) {
+  RemarkInfo.print(OS);
+  return OS;
+}
+
+/// Represents the unique location where the remark was issued which is based on
+/// the debug information attatched to the remark. The debug location conists of
+/// the source file path, function name, line number and column number.
+struct DebugLocation {
+  std::string SourceFilePath;
+  std::string FunctionName;
+  unsigned SourceLine = 0;
+  unsigned SourceColumn = 0;
+  DebugLocation() = default;
+  DebugLocation(StringRef SourceFilePath, StringRef FunctionName,
+                unsigned SourceLine, unsigned SourceColumn)
+      : SourceFilePath(SourceFilePath.str()), FunctionName(FunctionName.str()),
+        SourceLine(SourceLine), SourceColumn(SourceColumn) {}
+  std::string toString() {
+    return "Ln: " + to_string(SourceLine) + " Col: " + to_string(SourceColumn);
+  }
+};
+
+/// Configure the verbosity of the diff by choosing to only show unique remarks
+/// from each version or only consider remarks if they differ in type or
+/// argument. The configurator handles user specified arguments passed by flags.
+struct DiffConfigurator {
+  bool AddRemarksFromA;
+  bool AddRemarksFromB;
+  bool ShowCommonRemarks;
+  bool ShowRemarkTypeDiff;
+  bool ShowArgTypeDiff;
+  DiffConfigurator(bool ShowArgDiffOnly, bool OnlyShowCommonRemarks,
+                   bool OnlyShowDifferentRemarks, bool ShowOnlyA,
+                   bool ShowOnlyB, bool ShowRemarkTypeDiffOnly) {
+    AddRemarksFromA = !OnlyShowCommonRemarks && (ShowOnlyA || !ShowOnlyB);
+    AddRemarksFromB = !OnlyShowCommonRemarks && (ShowOnlyB || !ShowOnlyA);
+    ShowCommonRemarks = !OnlyShowDifferentRemarks || OnlyShowCommonRemarks;
+    ShowRemarkTypeDiff = !ShowArgDiffOnly || ShowRemarkTypeDiffOnly;
+    ShowArgTypeDiff = !ShowRemarkTypeDiffOnly || ShowArgDiffOnly;
+  }
+};
+
+/// Represent a diff where the remark header information is the same but the
+/// differ in remark type or agruments.
+struct DiffAtRemark {
+  RemarkInfo BaseRemark;
+  std::optional<std::pair<Type, Type>> RemarkTypeDiff;
+  SmallVector<RemarkArgInfo, 4> OnlyA;
+  SmallVector<RemarkArgInfo, 4> OnlyB;
+  SmallVector<RemarkArgInfo, 4> InBoth;
+
+  /// Compute the diff between two remarks \p RA and \p RB which share the same
+  /// header and differ in remark type and arguments.
+  DiffAtRemark(RemarkInfo &RA, RemarkInfo &RB, DiffConfigurator &DiffConfig)
+      : BaseRemark(RA) {
+    if (DiffConfig.ShowArgTypeDiff) {
+      unsigned ArgIdx = 0;
+      // Loop through the remarks in RA and RB in order comparing both.
+      for (; ArgIdx < std::min(RA.Args.size(), RB.Args.size()); ArgIdx++) {
+        if (RA.Args[ArgIdx] == (RB.Args[ArgIdx]))
+          InBoth.push_back(RA.Args[ArgIdx]);
+        else {
+          OnlyA.push_back(RA.Args[ArgIdx]);
+          OnlyB.push_back(RB.Args[ArgIdx]);
+        }
+      }
+
+      // Add the remaining remarks if they exist to OnlyA or OnlyB.
+      SmallVector<RemarkArgInfo, 4> RemainingArgs =
+          RA.Args.size() > RB.Args.size() ? RA.Args : RB.Args;
+      bool IsARemaining = RA.Args.size() > RB.Args.size() ? true : false;
+      for (; ArgIdx < RemainingArgs.size(); ArgIdx++)
+        if (IsARemaining)
+          OnlyA.push_back(RemainingArgs[ArgIdx]);
+        else
+          OnlyB.push_back(RemainingArgs[ArgIdx]);
+    }
+
+    // Compare remark type between RA and RB.
+    if (DiffConfig.ShowRemarkTypeDiff && RA.RemarkType != RB.RemarkType) {
+      RemarkTypeDiff = {RA.RemarkType, RB.RemarkType};
+    }
+  }
+
+  void print(raw_ostream &OS) const;
+
+  /// represent remark diff as a json object where the header is the same as the
+  /// baseline remark and diff json key represents the differences between the
+  /// two versions of the remark.
+  json::Object toJson();
+};
+
+/// Represents the diff at a debug location. This can be unique remarks that
+/// exist in file a or file b or remarks that share the same header but differ
+/// in remark type or arguments. Any common remarks at the location are
+/// discarded.
+struct DiffAtLoc {
+  DebugLocation Loc;
+  SmallVector<RemarkInfo, 4> OnlyA;
+  SmallVector<RemarkInfo, 4> OnlyB;
+  // list of remarks that are different but share the same header.
+  SmallVector<DiffAtRemark, 4> HasTheSameHeader;
+
+  DiffAtLoc(DebugLocation Loc) : Loc(Loc) {}
+
+  /// Check if the debug location is empty where no unique remarks exist
+  /// in A, B or remarks sharing the same header but differ in type or
+  /// arguments.
+  bool isEmpty() {
+    return OnlyA.empty() && OnlyB.empty() && HasTheSameHeader.empty();
+  }
+  void print(raw_ostream &OS);
+
+  /// Display diff as a json object.
+  json::Object toJson();
+};
+
+/// Represnt the diff between the two files as a list of diffs at each debug
+/// location found in both remark files. The diff is filtered by user-specified
+/// filters for remark name, pass name, function name and remark type.
+struct Diff {
+  SmallVector<DiffAtLoc, 8> DiffAtLocs;
+  Filters &Filter;
+  DiffConfigurator DiffConfig;
+  Diff(Filters &Filter, DiffConfigurator DiffConfig)
+      : Filter(Filter), DiffConfig(DiffConfig) {}
+  /// Taking all debug locations represented in both files in \p DebugLocs
+  /// calculate the difference between the remarks existing in each location in
+  /// \p DebugLoc2RemarkA and \p DebugLoc2RemarkB.
+  void computeDiff(
+      SetVector<DebugLocation> &DebugLocs,
+      MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> &DebugLoc2RemarkA,
+      MapVector<DebugLocation, SmallVector<RemarkInfo, 4>> &DebugLoc2RemarkB);
+  /// Compute the diff between the remarks at a shared debug location between
+  /// file a and b.
+  void computeDiffAtLoc(DebugLocation Loc, ArrayRef<RemarkInfo> RemarksA,
+                        ArrayRef<RemarkInfo> RemarksB);
+
+  Error printDiff(StringRef InputFileNameA, StringRef InputFileNameB);
+};
+} // namespace remarks
+
+template <> struct DenseMapInfo<remarks::DebugLocation, void> {
+  static inline remarks::DebugLocation getEmptyKey() {
+    return remarks::DebugLocation();
+  }
+
+  static inline remarks::DebugLocation getTombstoneKey() {
+    auto Loc = remarks::DebugLocation();
+    Loc.SourceFilePath = StringRef(
+        reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
+    Loc.FunctionName = StringRef(
+        reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
+    Loc.SourceColumn = ~0U - 1;
+    Loc.SourceLine = ~0U - 1;
+    return Loc;
+  }
+
+  static unsigned getHashValue(const remarks::DebugLocation &Key) {
+    return hash_combine(Key.SourceFilePath, Key.FunctionName, Key.SourceLine,
+                        Key.SourceColumn);
+  }
+
+  static bool isEqual(const remarks::DebugLocation &LHS,
+                      const remarks::DebugLocation &RHS) {
+    return std::make_tuple(LHS.SourceFilePath, LHS.FunctionName, LHS.SourceLine,
+                           LHS.SourceColumn) ==
+           std::make_tuple(RHS.SourceFilePath, RHS.FunctionName, RHS.SourceLine,
+                           RHS.SourceColumn);
+  }
+};
+
+template <> struct DenseMapInfo<remarks::RemarkInfo, void> {
+  static inline remarks::RemarkInfo getEmptyKey() {
+    return remarks::RemarkInfo();
+  }
+
+  static inline remarks::RemarkInfo getTombstoneKey() {
+    auto Info = remarks::RemarkInfo();
+    Info.RemarkName =
+        reinterpret_cast<const char *>(~static_cast<uintptr_t>(1));
+    Info.FunctionName =
+        reinterpret_cast<const char *>(~static_cast<uintptr_t>(1));
+    Info.PassName = reinterpret_cast<const char *>(~static_cast<uintptr_t>(1));
+    Info.RemarkType = remarks::Type::Unknown;
+    return Info;
+  }
+
+  static unsigned getHashValue(const remarks::RemarkInfo &Key) {
+    auto ArgCode = hash_combine_range(Key.Args.begin(), Key.Args.end());
+    return hash_combine(Key.RemarkName, Key.FunctionName, Key.PassName,
+                        remarks::typeToStr(Key.RemarkType), ArgCode);
+  }
+
+  static bool isEqual(const remarks::RemarkInfo &LHS,
+                      const remarks::RemarkInfo &RHS) {
+    return LHS == RHS;
+  }
+};
+} // namespace llvm
\ No newline at end of file
diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
index c0357161c4f984..626f7cce862f30 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
@@ -13,6 +13,70 @@
 
 namespace llvm {
 namespace remarks {
+/// taking the command line options for filtering the remark based on name, pass
+/// name, type and argumetns using string matching or regular expressions and
+/// construct a Remark Filter object which can filter the remarks based on the
+/// specified properties.
+Expected<Filters> getRemarkFilter(cl::opt<std::string> &RemarkNameOpt,
+                                  cl::opt<std::string> &RemarkNameOptRE,
+                                  cl::opt<std::string> &PassNameOpt,
+                                  cl::opt<std::string> &PassNameOptRE,
+                                  cl::opt<Type> &RemarkTypeOpt,
+                                  cl::opt<std::string> &RemarkFilterArgByOpt,
+                                  cl::opt<std::string> &RemarkArgFilterOptRE) {
+  // Create Filter properties.
+  std::optional<FilterMatcher> RemarkNameFilter;
+  std::optional<FilterMatcher> PassNameFilter;
+  std::optional<FilterMatcher> RemarkArgFilter;
+  std::optional<Type> RemarkType;
+  if (!RemarkNameOpt.empty())
+    RemarkNameFilter = {RemarkNameOpt, false};
+  else if (!RemarkNameOptRE.empty())
+    RemarkNameFilter = {RemarkNameOptRE, true};
+  if (!PassNameOpt.empty())
+    PassNameFilter = {PassNameOpt, false};
+  else if (!PassNameOptRE.empty())
+    PassNameFilter = {PassNameOptRE, true};
+  if (RemarkTypeOpt != Type::Failure)
+    RemarkType = RemarkTypeOpt;
+  if (!RemarkFilterArgByOpt.empty())
+    RemarkArgFilter = {RemarkFilterArgByOpt, false};
+  else if (!RemarkArgFilterOptRE.empty())
+    RemarkArgFilter = {RemarkArgFilterOptRE, true};
+  // Create RemarkFilter.
+  return Filters::createRemarkFilter(std::move(RemarkNameFilter),
+                                     std::move(PassNameFilter),
+                                     std::move(RemarkArgFilter), RemarkType);
+}
+
+Error Filters::regexArgumentsValid() {
+  if (RemarkNameFilter && RemarkNameFilter->IsRegex)
+    if (auto E = checkRegex(RemarkNameFilter->FilterRE))
+      return E;
+  if (PassNameFilter && PassNameFilter->IsRegex)
+    if (auto E = checkRegex(PassNameFilter->FilterRE))
+      return E;
+  if (ArgFilter && ArgFilter->IsRegex)
+    if (auto E = checkRegex(ArgFilter->FilterRE))
+      return E;
+  return Error::success();
+}
+
+bool Filters::filterRemark(const Remark &Remark) {
+  if (RemarkNameFilter && !RemarkNameFilter->match(Remark.RemarkName))
+    return false;
+  if (PassNameFilter && !PassNameFilter->match(Remark.PassName))
+    return false;
+  if (RemarkTypeFilter)
+    return *RemarkTypeFilter == Remark.RemarkType;
+  if (ArgFilter) {
+    if (!any_of(Remark.Args,
+                [this](Argument Arg) { return ArgFilter->match(Arg.Val); }))
+      return false;
+  }
+  return true;
+}
+
 /// \returns A MemoryBuffer for the input file on success, and an Error
 /// otherwise.
 Expected<std::unique_ptr<MemoryBuffer>>
diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
index 5d2335224d4c2f..c071289508db34 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
+++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
@@ -15,9 +15,11 @@
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Remarks/YAMLRemarkSerializer.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/ToolOutputFile.h"
 
 // Keep input + output help + names consistent across the various modes via a
@@ -47,8 +49,118 @@
           "number)"),                                                          \
       cl::init(false), cl::sub(SUBOPT));
 
+#define FILTER_COMMAND_LINE_OPTIONS(SUBOPT)                                    \
+  static cl::opt<std::string> RemarkNameOpt(                                   \
+      "remark-name",                                                           \
+      cl::desc("Optional remark name to filter collection by."),               \
+      cl::ValueOptional, cl::sub(SUBOPT));                                     \
+  static cl::opt<std::string> PassNameOpt(                                     \
+      "pass-name", cl::ValueOptional,                                          \
+      cl::desc("Optional remark pass name to filter collection by."),          \
+      cl::sub(SUBOPT));                                                        \
+  static cl::opt<std::string> RemarkFilterArgByOpt(                            \
+      "filter-arg-by",                                                         \
+      cl::desc("Optional remark arg to filter collection by."),                \
+      cl::ValueOptional, cl::sub(SUBOPT));                                     \
+  static cl::opt<std::string> RemarkNameOptRE(                                 \
+      "rremark-name",                                                          \
+      cl::desc("Optional remark name to filter collection by "                 \
+               "(accepts regular expressions)."),                              \
+      cl::ValueOptional, cl::sub(SUBOPT));                                     \
+  static cl::opt<std::string> RemarkArgFilterOptRE(                            \
+      "rfilter-arg-by",                                                        \
+      cl::desc("Optional remark arg to filter collection by "                  \
+               "(accepts regular expressions)."),                              \
+      cl::sub(SUBOPT), cl::ValueOptional);                                     \
+  static cl::opt<std::string> PassNameOptRE(                                   \
+      "rpass-name", cl::ValueOptional,                                         \
+      cl::desc("Optional remark pass name to filter collection "               \
+               "by (accepts regular expressions)."),                           \
+      cl::sub(SUBOPT));                                                        \
+  static cl::opt<Type> RemarkTypeOpt(                                          \
+      "remark-type",                                                           \
+      cl::desc("Optional remark type to filter collection by."),               \
+      cl::values(clEnumValN(Type::Unknown, "unknown", "UNKOWN"),               \
+                 clEnumValN(Type::Passed, "passed", "PASSED"),                 \
+                 clEnumValN(Type::Missed, "missed", "MISSED"),                 \
+                 clEnumValN(Type::Analysis, "analysis", "ANALYSIS"),           \
+                 clEnumValN(Type::AnalysisFPCommute, "analysis-fp-commute",    \
+                            "ANALYSIS_FP_COMMUTE"),                            \
+                 clEnumValN(Type::AnalysisAliasing, "analysis-aliasing",       \
+                            "ANALYSIS_ALIASING"),                              \
+                 clEnumValN(Type::Failure, "failure", "FAILURE")),             \
+      cl::init(Type::Failure), cl::sub(SUBOPT));
+
 namespace llvm {
 namespace remarks {
+
+/// Filter object which can be either a string or a regex to match with the
+/// remark properties.
+struct FilterMatcher {
+  Regex FilterRE;
+  std::string FilterStr;
+  bool IsRegex;
+  FilterMatcher(std::string Filter, bool IsRegex) : IsRegex(IsRegex) {
+    if (IsRegex)
+      FilterRE = Regex(Filter);
+    else
+      FilterStr = Filter;
+  }
+
+  bool match(StringRef StringToMatch) const {
+    if (IsRegex)
+      return FilterRE.match(StringToMatch);
+    return FilterStr == StringToMatch.trim().str();
+  }
+};
+
+/// Filter out remarks based on remark properties based on name, pass name,
+/// argument and type.
+struct Filters {
+  std::optional<FilterMatcher> RemarkNameFilter;
+  std::optional<FilterMatcher> PassNameFilter;
+  std::optional<FilterMatcher> ArgFilter;
+  std::optional<Type> RemarkTypeFilter;
+  /// Returns a filter object if all the arguments provided are valid regex
+  /// types otherwise return an error.
+  static Expected<Filters>
+  createRemarkFilter(std::optional<FilterMatcher> RemarkNameFilter,
+                     std::optional<FilterMatcher> PassNameFilter,
+                     std::optional<FilterMatcher> ArgFilter,
+                     std::optional<Type> RemarkTypeFilter) {
+    Filters Filter;
+    Filter.RemarkNameFilter = std::move(RemarkNameFilter);
+    Filter.PassNameFilter = std::move(PassNameFilter);
+    Filter.ArgFilter = std::move(ArgFilter);
+    Filter.RemarkTypeFilter = std::move(RemarkTypeFilter);
+    if (auto E = Filter.regexArgumentsValid())
+      return std::move(E);
+    return std::move(Filter);
+  }
+  /// Returns true if \p Remark satisfies all the provided filters.
+  bool filterRemark(const Remark &Remark);
+
+private:
+  /// Check if arguments can be parsed as valid regex types.
+  Error regexArgumentsValid();
+};
+
+/// Convert Regex string error to an error object.
+inline Error checkRegex(const Regex &Regex) {
+  std::string Error;
+  if (!Regex.isValid(Error))
+    return createStringError(make_error_code(std::errc::invalid_argument),
+                             Twine("Regex: ", Error));
+  return Error::success();
+}
+
+Expected<Filters> getRemarkFilter(cl::opt<std::string> &RemarkNameOpt,
+                                  cl::opt<std::string> &RemarkNameOptRE,
+                                  cl::opt<std::string> &PassNameOpt,
+                                  cl::opt<std::string> &PassNameOptRE,
+                                  cl::opt<Type> &RemarkTypeOpt,
+                                  cl::opt<std::string> &RemarkFilterArgByOpt,
+                                  cl::opt<std::string> &RemarkArgFilterOptRE);
 Expected<std::unique_ptr<MemoryBuffer>>
 getInputMemoryBuffer(StringRef InputFileName);
 Expected<std::unique_ptr<ToolOutputFile>>

>From e79089ca1603db14c517a9d6d740aa0365aeee99 Mon Sep 17 00:00:00 2001
From: Zain Jaffal <zain at jjaffal.com>
Date: Wed, 13 Mar 2024 00:55:10 +0000
Subject: [PATCH 2/2] [RemarkUtil][Docs] Add documentation for diff subcommand

Add relevant documentation for the `llvm-remarkutil diff` subcommand
---
 llvm/docs/CommandGuide/llvm-remarkutil.rst | 143 +++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/llvm/docs/CommandGuide/llvm-remarkutil.rst b/llvm/docs/CommandGuide/llvm-remarkutil.rst
index af7d8eb31c0181..857aa5b394cd69 100644
--- a/llvm/docs/CommandGuide/llvm-remarkutil.rst
+++ b/llvm/docs/CommandGuide/llvm-remarkutil.rst
@@ -21,7 +21,9 @@ Subcommands
   * :ref:`yaml2bitstream_subcommand` - Reserialize YAML remarks to bitstream.
   * :ref:`instruction-count_subcommand` - Output function instruction counts.
   * :ref:`annotation-count_subcommand` - Output remark type count from annotation remarks.
+  * :ref:`count_subcommand` - Output generic remark count.
   * :ref:`size-diff_subcommand` - Compute diff in size remarks.
+  * :ref:`diff_subcommand` - Compute diff between remarks
 
 .. _bitstream2yaml_subcommand:
 
@@ -425,3 +427,144 @@ EXIT STATUS
 
 :program:`llvm-remarkutil size-diff` returns 0 on success, and a non-zero value
 otherwise.
+
+.. _diff_subcommand:
+
+diff
+~~~~~
+
+.. program:: llvm-remarkutil diff
+
+
+USAGE: :program:`llvm-remarkutil diff` [*options*] <remarka_file>  <remarkb_file>
+
+Summary
+^^^^^^^
+
+:program:`llvm-remarkutil diff` hilights the difference between two versions of `remarks <https://llvm.org/docs/Remarks.html>`_ based on specified properties.
+The tool will organise remarks based on the debug location and highlight the differences between remarks with the same header i.e remark name, pass name and function name. The tool by default highlights the differences in arguments between two remarks and the difference in remark type.
+The tool contains utilities to filter the remark diff based on remark name, pass name, argument value and remark type.
+
+
+Example
+^^^^^^^
+
+``remarks-passed.yaml``
+
+::
+
+    --- !Passed
+  Pass:            Pass 
+  Name:            RemarkName 
+  DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+  Function:        func0
+  Args:
+    - String:   '2'
+    - String:          'Info2'
+
+
+``remarks-missed.yaml``
+
+::
+
+  --- !Missed
+  Pass:            Pass 
+  Name:            RemarkName 
+  DebugLoc:        { File: path/to/anno.c, Line: 1, Column: 2 }
+  Function:        func0
+  Args:
+    - String:   '2'
+    - String:          'Info2'
+
+
+Output
+::
+
+  File A: remarks-passed.yaml
+  File B: remarks-missed.yaml
+  ----------
+  path/to/anno.c:func0  Ln: 1 Col: 2
+  --- Has the same header ---
+  Name: RemarkName
+  FunctionName: func0
+  PassName: Pass
+  Only at A >>>>
+  Type: Passed
+  =====
+  Only at B <<<<
+  Type: Missed
+  =====
+
+Notes
+^^^^^
+
+* Duplicate remarks in each remark file are discared.
+* If a remark doesn't contain a debug location then it won't be taken into account when caluclating diffs.
+
+Options
+^^^^^^^
+
+Options are similar to the ones for :ref:`count_subcommand` in terms of filtering remarks based on properties. 
+Additional options include the following. 
+
+.. option:: --only-show-a
+
+  Show remarks that are only in A.
+
+.. option:: --only-show-b
+
+  Show remarks that are only in B.
+
+.. option:: --only-show-common-remarks
+
+  Show intersecting remarks between A and B.
+
+.. option:: --only-show-different-remarks
+
+  Show all the remarks that are unique to A and B.
+
+.. option:: --report_style=<value> 
+
+    * human Human-readable format
+    * json  JSON format
+.. option:: -show-arg-diff-only
+  
+   Show only the remarks that have the same header and differ in arguments
+
+.. option:: --show-remark-type-diff-only
+  
+  Only show diff if remarks have the same header but different typ
+
+JSON format 
+^^^^^^^^^^^
+
+Given the above example the corresponding ``json`` format is
+
+.. code-block:: json
+
+  {
+    "Files": {
+      "A": "remarks-passed.yaml",
+      "B": "remarks-missed.yaml"
+    },
+    "path/to/anno.c": {
+      "func0": {
+        "Ln: 1 Col: 2": {
+          "HasSameHeaderObj": [
+            {
+              "Diff": {
+                "RemarkTypeA": "Passed",
+                "RemarkTypeB": "Missed"
+              },
+              "FunctionName": "func0",
+              "PassName": "Pass",
+              "RemarkName": "RemarkName"
+            }
+          ],
+          "OnlyA": [],
+          "OnlyB": []
+        }
+      }
+    }
+  }
+



More information about the llvm-commits mailing list