[llvm] d4b7c0d - [Remarks] Auto-detect remark parser format (#144554)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 18 12:49:58 PDT 2025


Author: Tobias Stadler
Date: 2025-06-18T20:49:55+01:00
New Revision: d4b7c0d8b437f50ea254d814a1aeecf87a17be91

URL: https://github.com/llvm/llvm-project/commit/d4b7c0d8b437f50ea254d814a1aeecf87a17be91
DIFF: https://github.com/llvm/llvm-project/commit/d4b7c0d8b437f50ea254d814a1aeecf87a17be91.diff

LOG: [Remarks] Auto-detect remark parser format (#144554)

Add remark format 'Auto', which performs automatic detection of the
remark format using the magic numbers at the beginning of the remarks
files.

The RemarkLinker already did something similar, so we streamlined this
and exposed this to llvm-remarkutil.

Added: 
    llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
    llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test

Modified: 
    llvm/include/llvm/Remarks/RemarkFormat.h
    llvm/include/llvm/Remarks/RemarkLinker.h
    llvm/lib/Remarks/RemarkFormat.cpp
    llvm/lib/Remarks/RemarkLinker.cpp
    llvm/lib/Remarks/RemarkParser.cpp
    llvm/lib/Remarks/RemarkSerializer.cpp
    llvm/test/tools/llvm-remarkutil/annotation-count.test
    llvm/test/tools/llvm-remarkutil/empty-file.test
    llvm/test/tools/llvm-remarkutil/instruction-count.test
    llvm/test/tools/llvm-remarkutil/instruction-mix.test
    llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test
    llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
    llvm/unittests/Remarks/RemarksLinkingTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Remarks/RemarkFormat.h b/llvm/include/llvm/Remarks/RemarkFormat.h
index a39a013dcf905..eda201d4ee6f1 100644
--- a/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -23,7 +23,7 @@ namespace remarks {
 constexpr StringLiteral Magic("REMARKS");
 
 /// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML, Bitstream };
+enum class Format { Unknown, Auto, YAML, Bitstream };
 
 /// Parse and validate a string for the remark format.
 LLVM_ABI Expected<Format> parseFormat(StringRef FormatStr);
@@ -31,6 +31,9 @@ LLVM_ABI Expected<Format> parseFormat(StringRef FormatStr);
 /// Parse and validate a magic number to a remark format.
 LLVM_ABI Expected<Format> magicToFormat(StringRef Magic);
 
+/// Detect format based on selected format and magic number
+LLVM_ABI Expected<Format> detectFormat(Format Selected, StringRef Magic);
+
 } // end namespace remarks
 } // end namespace llvm
 

diff  --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h
index 5343c62144708..67208f40592a5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -80,13 +80,12 @@ struct RemarkLinker {
   /// \p Buffer.
   /// \p Buffer can be either a standalone remark container or just
   /// metadata. This takes care of uniquing and merging the remarks.
-  LLVM_ABI Error link(StringRef Buffer,
-                      std::optional<Format> RemarkFormat = std::nullopt);
+  LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);
 
   /// Link the remarks found in \p Obj by looking for the right section and
   /// calling the method above.
   LLVM_ABI Error link(const object::ObjectFile &Obj,
-                      std::optional<Format> RemarkFormat = std::nullopt);
+                      Format RemarkFormat = Format::Auto);
 
   /// Serialize the linked remarks to the stream \p OS, using the format \p
   /// RemarkFormat.

diff  --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp
index 800f5bffe70da..1c52e352f9392 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -42,6 +42,22 @@ Expected<Format> llvm::remarks::magicToFormat(StringRef MagicStr) {
 
   if (Result == Format::Unknown)
     return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "Unknown remark magic: '%s'", MagicStr.data());
+                             "Automatic detection of remark format failed. "
+                             "Unknown magic number: '%.4s'",
+                             MagicStr.data());
   return Result;
 }
+
+Expected<Format> llvm::remarks::detectFormat(Format Selected,
+                                             StringRef MagicStr) {
+  if (Selected == Format::Unknown)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown remark parser format.");
+  if (Selected != Format::Auto)
+    return Selected;
+
+  // Empty files are valid bitstream files
+  if (MagicStr.empty())
+    return Format::Bitstream;
+  return magicToFormat(MagicStr);
+}

diff  --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp
index b8395aa135d82..0ca6217edfddd 100644
--- a/llvm/lib/Remarks/RemarkLinker.cpp
+++ b/llvm/lib/Remarks/RemarkLinker.cpp
@@ -66,17 +66,10 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) {
   PrependPath = std::string(PrependPathIn);
 }
 
-Error RemarkLinker::link(StringRef Buffer, std::optional<Format> RemarkFormat) {
-  if (!RemarkFormat) {
-    Expected<Format> ParserFormat = magicToFormat(Buffer);
-    if (!ParserFormat)
-      return ParserFormat.takeError();
-    RemarkFormat = *ParserFormat;
-  }
-
+Error RemarkLinker::link(StringRef Buffer, Format RemarkFormat) {
   Expected<std::unique_ptr<RemarkParser>> MaybeParser =
       createRemarkParserFromMeta(
-          *RemarkFormat, Buffer,
+          RemarkFormat, Buffer,
           PrependPath ? std::optional<StringRef>(StringRef(*PrependPath))
                       : std::optional<StringRef>());
   if (!MaybeParser)
@@ -102,8 +95,7 @@ Error RemarkLinker::link(StringRef Buffer, std::optional<Format> RemarkFormat) {
   return Error::success();
 }
 
-Error RemarkLinker::link(const object::ObjectFile &Obj,
-                         std::optional<Format> RemarkFormat) {
+Error RemarkLinker::link(const object::ObjectFile &Obj, Format RemarkFormat) {
   Expected<std::optional<StringRef>> SectionOrErr =
       getRemarksSectionContents(Obj);
   if (!SectionOrErr)

diff  --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index 5c1690aaa0fe6..038fc1d3f4856 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -15,6 +15,7 @@
 #include "BitstreamRemarkParser.h"
 #include "YAMLRemarkParser.h"
 #include "llvm-c/Remarks.h"
+#include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include <optional>
 
@@ -50,14 +51,18 @@ Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
 
 Expected<std::unique_ptr<RemarkParser>>
 llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) {
-  switch (ParserFormat) {
+  auto DetectedFormat = detectFormat(ParserFormat, Buf);
+  if (!DetectedFormat)
+    return DetectedFormat.takeError();
+
+  switch (*DetectedFormat) {
   case Format::YAML:
     return std::make_unique<YAMLRemarkParser>(Buf);
   case Format::Bitstream:
     return std::make_unique<BitstreamRemarkParser>(Buf);
   case Format::Unknown:
-    return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "Unknown remark parser format.");
+  case Format::Auto:
+    break;
   }
   llvm_unreachable("unhandled ParseFormat");
 }
@@ -66,15 +71,19 @@ Expected<std::unique_ptr<RemarkParser>>
 llvm::remarks::createRemarkParserFromMeta(
     Format ParserFormat, StringRef Buf,
     std::optional<StringRef> ExternalFilePrependPath) {
-  switch (ParserFormat) {
+  auto DetectedFormat = detectFormat(ParserFormat, Buf);
+  if (!DetectedFormat)
+    return DetectedFormat.takeError();
+
+  switch (*DetectedFormat) {
   case Format::YAML:
     return createYAMLParserFromMeta(Buf, std::move(ExternalFilePrependPath));
   case Format::Bitstream:
     return createBitstreamParserFromMeta(Buf,
                                          std::move(ExternalFilePrependPath));
   case Format::Unknown:
-    return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "Unknown remark parser format.");
+  case Format::Auto:
+    break;
   }
   llvm_unreachable("unhandled ParseFormat");
 }

diff  --git a/llvm/lib/Remarks/RemarkSerializer.cpp b/llvm/lib/Remarks/RemarkSerializer.cpp
index cc10b91f287af..df1da53d7c8a6 100644
--- a/llvm/lib/Remarks/RemarkSerializer.cpp
+++ b/llvm/lib/Remarks/RemarkSerializer.cpp
@@ -22,8 +22,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
                                 raw_ostream &OS) {
   switch (RemarksFormat) {
   case Format::Unknown:
+  case Format::Auto:
     return createStringError(std::errc::invalid_argument,
-                             "Unknown remark serializer format.");
+                             "Invalid remark serializer format.");
   case Format::YAML:
     return std::make_unique<YAMLRemarkSerializer>(OS, Mode);
   case Format::Bitstream:
@@ -37,8 +38,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode,
                                 raw_ostream &OS, remarks::StringTable StrTab) {
   switch (RemarksFormat) {
   case Format::Unknown:
+  case Format::Auto:
     return createStringError(std::errc::invalid_argument,
-                             "Unknown remark serializer format.");
+                             "Invalid remark serializer format.");
   case Format::YAML:
     return std::make_unique<YAMLRemarkSerializer>(OS, Mode, std::move(StrTab));
   case Format::Bitstream:

diff  --git a/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream b/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
new file mode 100644
index 0000000000000..97b5955f788bc
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
@@ -0,0 +1 @@
+12345678

diff  --git a/llvm/test/tools/llvm-remarkutil/annotation-count.test b/llvm/test/tools/llvm-remarkutil/annotation-count.test
index e006220c64f38..ee44ed2035c8b 100644
--- a/llvm/test/tools/llvm-remarkutil/annotation-count.test
+++ b/llvm/test/tools/llvm-remarkutil/annotation-count.test
@@ -1,5 +1,7 @@
 RUN: llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s
+RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s
 RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark | FileCheck %s
+RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --annotation-type=remark | FileCheck %s
 RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="AnnotationSummary" %p/Inputs/annotation-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK
 RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="AnnotationSummary" | FileCheck %s --check-prefix=COUNT-CHECK
 

diff  --git a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test
new file mode 100644
index 0000000000000..f469eadc07f99
--- /dev/null
+++ b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test
@@ -0,0 +1,6 @@
+RUN: not llvm-remarkutil instruction-count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
+RUN: not llvm-remarkutil instruction-mix %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
+RUN: not llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
+RUN: not llvm-remarkutil count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s
+
+CHECK: error: Automatic detection of remark format failed. Unknown magic number: '1234'

diff  --git a/llvm/test/tools/llvm-remarkutil/empty-file.test b/llvm/test/tools/llvm-remarkutil/empty-file.test
index bdc5fcf87f7bf..d9820a088ea8f 100644
--- a/llvm/test/tools/llvm-remarkutil/empty-file.test
+++ b/llvm/test/tools/llvm-remarkutil/empty-file.test
@@ -8,6 +8,11 @@ RUN: llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/empty-file -
 RUN: llvm-remarkutil instruction-mix --parser=bitstream %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM
 RUN: llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM
 RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM
+; Parser format auto-detection should treat empty files as bitstream files
+RUN: llvm-remarkutil instruction-count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM
+RUN: llvm-remarkutil instruction-mix %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM
+RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM
+RUN: llvm-remarkutil count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM
 
 ; YAMLPARSER: error: document root is not of mapping type.
 

diff  --git a/llvm/test/tools/llvm-remarkutil/instruction-count.test b/llvm/test/tools/llvm-remarkutil/instruction-count.test
index d94f4f94cc1df..a0aa6dc98c44d 100644
--- a/llvm/test/tools/llvm-remarkutil/instruction-count.test
+++ b/llvm/test/tools/llvm-remarkutil/instruction-count.test
@@ -1,5 +1,7 @@
 RUN: llvm-remarkutil instruction-count --parser=yaml %p/Inputs/instruction-count.yaml | FileCheck %s
+RUN: llvm-remarkutil instruction-count %p/Inputs/instruction-count.yaml | FileCheck %s
 RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count --parser=bitstream | FileCheck %s
+RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count | FileCheck %s
 RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="InstructionCount" %p/Inputs/instruction-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK
 RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml  | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="InstructionCount" | FileCheck %s --check-prefix=COUNT-CHECK
 RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --rremark-name=* %p/Inputs/instruction-count.yaml 2>&1 | FileCheck %s --check-prefix=ERROR-REPOPERATOR -DARG=rremark-name
@@ -18,4 +20,4 @@ RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function
 ; COUNT-CHECK: func3,3
 
 ; ERROR-REPOPERATOR: error: invalid argument '--[[ARG]]=*': repetition-operator operand invalid
-; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name
\ No newline at end of file
+; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name

diff  --git a/llvm/test/tools/llvm-remarkutil/instruction-mix.test b/llvm/test/tools/llvm-remarkutil/instruction-mix.test
index 178c1311b2fea..15994679f5d48 100644
--- a/llvm/test/tools/llvm-remarkutil/instruction-mix.test
+++ b/llvm/test/tools/llvm-remarkutil/instruction-mix.test
@@ -1,5 +1,7 @@
 RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml | FileCheck %s
+RUN: llvm-remarkutil instruction-mix %p/Inputs/instruction-mix.yaml | FileCheck %s
 RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix --parser=bitstream | FileCheck %s
+RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix | FileCheck %s
 RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=human | FileCheck %s
 RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=csv | FileCheck %s --check-prefix=CSV
 RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --rfilter=meow | FileCheck %s --check-prefix=MEOW-RE
@@ -34,4 +36,4 @@ RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix
 ; NONE-EXACT: ----------- -----
 ; NONE-NOT: {{.*}}
 
-; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid
\ No newline at end of file
+; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid

diff  --git a/llvm/test/tools/llvm-remarkutil/size-
diff /no-
diff erence.test b/llvm/test/tools/llvm-remarkutil/size-
diff /no-
diff erence.test
index a9b6ba4ae256a..8550339bebc4d 100644
--- a/llvm/test/tools/llvm-remarkutil/size-
diff /no-
diff erence.test
+++ b/llvm/test/tools/llvm-remarkutil/size-
diff /no-
diff erence.test
@@ -1,4 +1,7 @@
 RUN: llvm-remarkutil size-
diff  %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml --parser=yaml | FileCheck -strict-whitespace %s
+RUN: llvm-remarkutil size-
diff  %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s
+RUN: llvm-remarkutil yaml2bitstream -o %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml
+RUN: llvm-remarkutil size-
diff  %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s
 
 ; Same file passed twice -> no changes reported.
 

diff  --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
index eb393bc3e3046..894ac8354e18b 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
+++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
@@ -35,9 +35,12 @@
 // Keep Input format and names consistent accross the modes via a macro.
 #define INPUT_FORMAT_COMMAND_LINE_OPTIONS(SUBOPT)                              \
   static cl::opt<Format> InputFormat(                                          \
-      "parser", cl::desc("Input remark format to parse"),                      \
-      cl::values(clEnumValN(Format::YAML, "yaml", "YAML"),                     \
-                 clEnumValN(Format::Bitstream, "bitstream", "Bitstream")),     \
+      "parser", cl::init(Format::Auto),                                        \
+      cl::desc("Input remark format to parse"),                                \
+      cl::values(                                                              \
+          clEnumValN(Format::Auto, "auto", "Automatic detection (default)"),   \
+          clEnumValN(Format::YAML, "yaml", "YAML"),                            \
+          clEnumValN(Format::Bitstream, "bitstream", "Bitstream")),            \
       cl::sub(SUBOPT));
 
 #define DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(SUBOPT)                            \

diff  --git a/llvm/unittests/Remarks/RemarksLinkingTest.cpp b/llvm/unittests/Remarks/RemarksLinkingTest.cpp
index dcd598aaeb5cd..89de9e8f4f95d 100644
--- a/llvm/unittests/Remarks/RemarksLinkingTest.cpp
+++ b/llvm/unittests/Remarks/RemarksLinkingTest.cpp
@@ -243,10 +243,8 @@ TEST(Remarks, LinkingError) {
     // Check that the prepend path is propagated and fails with the full path.
     // Also ensures that the remark format is correctly auto-detected.
     RL.setExternalFilePrependPath("/baddir/");
-    Error E = RL.link(
-        StringRef("REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml",
-                  40),
-        /*RemarkFormat=*/std::nullopt);
+    Error E = RL.link(StringRef(
+        "REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml", 40));
     EXPECT_TRUE(static_cast<bool>(E));
     std::string ErrorMessage = toString(std::move(E));
     EXPECT_EQ(StringRef(ErrorMessage).lower(),


        


More information about the llvm-commits mailing list