[llvm] r359050 - [Remarks] Add string deduplication using a string table

Francis Visoiu Mistrih via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 17:06:25 PDT 2019


Author: thegameg
Date: Tue Apr 23 17:06:24 2019
New Revision: 359050

URL: http://llvm.org/viewvc/llvm-project?rev=359050&view=rev
Log:
[Remarks] Add string deduplication using a string table

* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.

* Add parsing support for the string table in the RemarkParser.

>From this remark:

```
--- !Missed
Pass:     inline
Name:     NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
            Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
  - Callee:   printf
  - String:   ' will not be inlined into '
  - Caller:   printArgsNoRet
    DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
                Line: 6, Column: 0 }
  - String:   ' because its definition is unavailable'
...
```

to:

```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
  - Callee:   4
  - String:   5
  - Caller:   2
    DebugLoc: { File: 3, Line: 6, Column: 0 }
  - String:   6
...
```

And the string table in the .remarks/__remarks section containing:

```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```

This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.

Differential Revision: https://reviews.llvm.org/D60227

Added:
    llvm/trunk/include/llvm/Remarks/RemarkStringTable.h
    llvm/trunk/lib/Remarks/RemarkStringTable.cpp
    llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp
Modified:
    llvm/trunk/docs/CodeGenerator.rst
    llvm/trunk/include/llvm/IR/RemarkStreamer.h
    llvm/trunk/include/llvm/Remarks/RemarkParser.h
    llvm/trunk/include/llvm/Support/YAMLTraits.h
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt
    llvm/trunk/lib/IR/DiagnosticInfo.cpp
    llvm/trunk/lib/IR/LLVMBuild.txt
    llvm/trunk/lib/IR/RemarkStreamer.cpp
    llvm/trunk/lib/Remarks/CMakeLists.txt
    llvm/trunk/lib/Remarks/RemarkParser.cpp
    llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp
    llvm/trunk/lib/Remarks/YAMLRemarkParser.h
    llvm/trunk/test/CodeGen/X86/remarks-section.ll
    llvm/trunk/unittests/Remarks/CMakeLists.txt
    llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp

Modified: llvm/trunk/docs/CodeGenerator.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CodeGenerator.rst?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/docs/CodeGenerator.rst (original)
+++ llvm/trunk/docs/CodeGenerator.rst Tue Apr 23 17:06:24 2019
@@ -1597,6 +1597,10 @@ A section containing metadata on remark
 
 * a magic number: "REMARKS\0"
 * the version number: a little-endian uint64_t
+* the string table:
+  * the total size of the string table (the size itself excluded):
+    little-endian uint64_t
+  * a list of null-terminated strings
 * the absolute file path to the serialized remark diagnostics: a
   null-terminated string.
 

Modified: llvm/trunk/include/llvm/IR/RemarkStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/RemarkStreamer.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/RemarkStreamer.h (original)
+++ llvm/trunk/include/llvm/IR/RemarkStreamer.h Tue Apr 23 17:06:24 2019
@@ -14,10 +14,11 @@
 #define LLVM_IR_REMARKSTREAMER_H
 
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Regex.h"
 #include <string>
 #include <vector>
 
@@ -34,6 +35,11 @@ class RemarkStreamer {
   /// The YAML streamer.
   yaml::Output YAMLOutput;
 
+  /// The string table containing all the unique strings used in the output.
+  /// The table will be serialized in a section to be consumed after the
+  /// compilation.
+  remarks::StringTable StrTab;
+
 public:
   RemarkStreamer(StringRef Filename, raw_ostream& OS);
   /// Return the filename that the remark diagnostics are emitted to.
@@ -45,6 +51,9 @@ public:
   Error setFilter(StringRef Filter);
   /// Emit a diagnostic through the streamer.
   void emit(const DiagnosticInfoOptimizationBase &Diag);
+  /// The string table used during emission.
+  remarks::StringTable &getStringTable() { return StrTab; }
+  const remarks::StringTable &getStringTable() const { return StrTab; }
 };
 } // end namespace llvm
 

Modified: llvm/trunk/include/llvm/Remarks/RemarkParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Remarks/RemarkParser.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Remarks/RemarkParser.h (original)
+++ llvm/trunk/include/llvm/Remarks/RemarkParser.h Tue Apr 23 17:06:24 2019
@@ -13,6 +13,7 @@
 #ifndef LLVM_REMARKS_REMARK_PARSER_H
 #define LLVM_REMARKS_REMARK_PARSER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Support/Error.h"
@@ -32,6 +33,11 @@ struct Parser {
   /// This constructor should be only used for parsing YAML remarks.
   Parser(StringRef Buffer);
 
+  /// Create a parser parsing \p Buffer to Remark objects, using \p StrTabBuf as
+  /// string table.
+  /// This constructor should be only used for parsing YAML remarks.
+  Parser(StringRef Buffer, StringRef StrTabBuf);
+
   // Needed because ParserImpl is an incomplete type.
   ~Parser();
 
@@ -40,6 +46,18 @@ struct Parser {
   Expected<const Remark *> getNext() const;
 };
 
+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+  /// The buffer mapped from the section contents.
+  StringRef Buffer;
+  /// Collection of offsets in the buffer for each string entry.
+  SmallVector<size_t, 8> Offsets;
+
+  Expected<StringRef> operator[](size_t Index);
+  ParsedStringTable(StringRef Buffer);
+};
+
 } // end namespace remarks
 } // end namespace llvm
 

Added: llvm/trunk/include/llvm/Remarks/RemarkStringTable.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Remarks/RemarkStringTable.h?rev=359050&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Remarks/RemarkStringTable.h (added)
+++ llvm/trunk/include/llvm/Remarks/RemarkStringTable.h Tue Apr 23 17:06:24 2019
@@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+  /// Allocator holding all the memory used by the map.
+  BumpPtrAllocator Allocator;
+  /// The string table containing all the unique strings used in the output.
+  /// It maps a string to an unique ID.
+  StringMap<unsigned, BumpPtrAllocator &> StrTab;
+  /// Total size of the string table when serialized.
+  size_t SerializedSize = 0;
+
+  StringTable() : Allocator(), StrTab(Allocator) {}
+  /// Add a string to the table. It returns an unique ID of the string.
+  std::pair<unsigned, StringRef> add(StringRef Str);
+  /// Serialize the string table to a stream. It is serialized as a little
+  /// endian uint64 (the size of the table in bytes) followed by a sequence of
+  /// NULL-terminated strings, where the N-th string is the string with the ID N
+  /// in the StrTab map.
+  void serialize(raw_ostream &OS) const;
+  /// Serialize the string table to a vector. This allows users to do the actual
+  /// writing to file/memory/other.
+  /// The string with the ID == N should be the N-th element in the vector.
+  std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */

Modified: llvm/trunk/include/llvm/Support/YAMLTraits.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/YAMLTraits.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/YAMLTraits.h (original)
+++ llvm/trunk/include/llvm/Support/YAMLTraits.h Tue Apr 23 17:06:24 2019
@@ -1905,6 +1905,11 @@ struct SequenceTraits<SmallVector<T, N>,
                       typename std::enable_if<CheckIsBool<
                           SequenceElementTraits<T>::flow>::value>::type>
     : SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+                      typename std::enable_if<CheckIsBool<
+                          SequenceElementTraits<T>::flow>::value>::type>
+    : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};
 
 // Sequences of fundamental types use flow formatting.
 template <typename T>

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Apr 23 17:06:24 2019
@@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Modu
   support::endian::write64le(Version.data(), remarks::Version);
   OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
 
+  // Emit the string table in the section.
+  // Note: we need to use the streamer here to emit it in the section. We can't
+  // just use the serialize function with a raw_ostream because of the way
+  // MCStreamers work.
+  const remarks::StringTable &StrTab = RS->getStringTable();
+  std::vector<StringRef> StrTabStrings = StrTab.serialize();
+  uint64_t StrTabSize = StrTab.SerializedSize;
+  // Emit the total size of the string table (the size itself excluded):
+  // little-endian uint64_t.
+  // The total size is located after the version number.
+  std::array<char, 8> StrTabSizeBuf;
+  support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+  OutStreamer->EmitBinaryData(
+      StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+  // Emit a list of null-terminated strings.
+  // Note: the order is important here: the ID used in the remarks corresponds
+  // to the position of the string in the section.
+  for (StringRef Str : StrTabStrings) {
+    OutStreamer->EmitBytes(Str);
+    // Explicitly emit a '\0'.
+    OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+  }
+
   // Emit the null-terminated absolute path to the remark file.
   // The path is located at the offset 0x4 in the section.
   StringRef FilenameRef = RS->getFilename();

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt Tue Apr 23 17:06:24 2019
@@ -18,4 +18,4 @@
 type = Library
 name = AsmPrinter
 parent = Libraries
-required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target

Modified: llvm/trunk/lib/IR/DiagnosticInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/DiagnosticInfo.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/DiagnosticInfo.cpp (original)
+++ llvm/trunk/lib/IR/DiagnosticInfo.cpp Tue Apr 23 17:06:24 2019
@@ -43,6 +43,8 @@
 
 using namespace llvm;
 
+cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
+
 int llvm::getNextAvailablePluginDiagnosticKind() {
   static std::atomic<int> PluginKindID(DK_FirstPluginKind);
   return ++PluginKindID;
@@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBa
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
 
+template <typename T>
+static void mapRemarkHeader(
+    yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
+    T FunctionName, Optional<uint64_t> Hotness,
+    SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", RemarkName);
+  if (!io.outputting() || DL.isValid())
+    io.mapOptional("DebugLoc", DL);
+  io.mapRequired("Function", FunctionName);
+  io.mapOptional("Hotness", Hotness);
+  io.mapOptional("Args", Args);
+}
+
 namespace llvm {
 namespace yaml {
 
@@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimiz
       GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
 
   StringRef PassName(OptDiag->PassName);
-  io.mapRequired("Pass", PassName);
-  io.mapRequired("Name", OptDiag->RemarkName);
-  if (!io.outputting() || DL.isValid())
-    io.mapOptional("DebugLoc", DL);
-  io.mapRequired("Function", FN);
-  io.mapOptional("Hotness", OptDiag->Hotness);
-  io.mapOptional("Args", OptDiag->Args);
+  if (UseStringTable) {
+    remarks::StringTable &StrTab =
+        reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+    unsigned PassID = StrTab.add(PassName).first;
+    unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
+    unsigned FunctionID = StrTab.add(FN).first;
+    mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
+                    OptDiag->Args);
+  } else {
+    mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
+                    OptDiag->Args);
+  }
 }
 
 template <> struct MappingTraits<DiagnosticLocation> {
@@ -430,7 +451,15 @@ template <> struct MappingTraits<Diagnos
     unsigned Line = DL.getLine();
     unsigned Col = DL.getColumn();
 
-    io.mapRequired("File", File);
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      unsigned FileID = StrTab.add(File).first;
+      io.mapRequired("File", FileID);
+    } else {
+      io.mapRequired("File", File);
+    }
+
     io.mapRequired("Line", Line);
     io.mapRequired("Column", Col);
   }
@@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<Str
 template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
   static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
     assert(io.outputting() && "input not yet implemented");
-    // Emit a string block scalar for multiline strings, to preserve newlines.
-    if (StringRef(A.Val).count('\n') > 1) {
+
+    if (UseStringTable) {
+      remarks::StringTable &StrTab =
+          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+      auto ValueID = StrTab.add(A.Val).first;
+      io.mapRequired(A.Key.data(), ValueID);
+    } else if (StringRef(A.Val).count('\n') > 1) {
       StringBlockVal S(A.Val);
       io.mapRequired(A.Key.data(), S);
-    } else
+    } else {
       io.mapRequired(A.Key.data(), A.Val);
+    }
     if (A.Loc.isValid())
       io.mapOptional("DebugLoc", A.Loc);
   }

Modified: llvm/trunk/lib/IR/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/LLVMBuild.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/LLVMBuild.txt (original)
+++ llvm/trunk/lib/IR/LLVMBuild.txt Tue Apr 23 17:06:24 2019
@@ -18,4 +18,4 @@
 type = Library
 name = Core
 parent = Libraries
-required_libraries = BinaryFormat Support
+required_libraries = BinaryFormat Remarks Support

Modified: llvm/trunk/lib/IR/RemarkStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/RemarkStreamer.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/RemarkStreamer.cpp (original)
+++ llvm/trunk/lib/IR/RemarkStreamer.cpp Tue Apr 23 17:06:24 2019
@@ -17,7 +17,7 @@ using namespace llvm;
 
 RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
     : Filename(Filename), OS(OS),
-      YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+      YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
   assert(!Filename.empty() && "This needs to be a real filename.");
 }
 

Modified: llvm/trunk/lib/Remarks/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/CMakeLists.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/CMakeLists.txt (original)
+++ llvm/trunk/lib/Remarks/CMakeLists.txt Tue Apr 23 17:06:24 2019
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMRemarks
   Remark.cpp
   RemarkParser.cpp
+  RemarkStringTable.cpp
   YAMLRemarkParser.cpp
 )

Modified: llvm/trunk/lib/Remarks/RemarkParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/RemarkParser.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/RemarkParser.cpp (original)
+++ llvm/trunk/lib/Remarks/RemarkParser.cpp Tue Apr 23 17:06:24 2019
@@ -22,6 +22,9 @@ using namespace llvm::remarks;
 
 Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {}
 
+Parser::Parser(StringRef Buf, StringRef StrTabBuf)
+    : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {}
+
 Parser::~Parser() = default;
 
 static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
@@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext
   llvm_unreachable("Get next called with an unknown parsing implementation.");
 }
 
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+  while (!InBuffer.empty()) {
+    // Strings are separated by '\0' bytes.
+    std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+    // We only store the offset from the beginning of the buffer.
+    Offsets.push_back(Split.first.data() - Buffer.data());
+    InBuffer = Split.second;
+  }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) {
+  if (Index >= Offsets.size())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "String with index %u is out of bounds (size = %u).", Index,
+        Offsets.size());
+
+  size_t Offset = Offsets[Index];
+  // If it's the last offset, we can't use the next offset to know the size of
+  // the string.
+  size_t NextOffset =
+      (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+  return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
 

Added: llvm/trunk/lib/Remarks/RemarkStringTable.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/RemarkStringTable.cpp?rev=359050&view=auto
==============================================================================
--- llvm/trunk/lib/Remarks/RemarkStringTable.cpp (added)
+++ llvm/trunk/lib/Remarks/RemarkStringTable.cpp Tue Apr 23 17:06:24 2019
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+  size_t NextID = StrTab.size();
+  auto KV = StrTab.insert({Str, NextID});
+  // If it's a new string, add it to the final size.
+  if (KV.second)
+    SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+  // Can be either NextID or the previous ID if the string is already there.
+  return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+  // Emit the number of strings.
+  uint64_t StrTabSize = SerializedSize;
+  support::endian::write(OS, StrTabSize, support::little);
+  // Emit the sequence of strings.
+  for (StringRef Str : serialize()) {
+    OS << Str;
+    // Explicitly emit a '\0'.
+    OS.write('\0');
+  }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+  std::vector<StringRef> Strings{StrTab.size()};
+  for (const auto &KV : StrTab)
+    Strings[KV.second] = KV.first();
+  return Strings;
+}

Modified: llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp (original)
+++ llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp Tue Apr 23 17:06:24 2019
@@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Resu
   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
   if (!Value)
     return make_error<YAMLParseError>("expected a value of scalar type.", Node);
-  StringRef Tmp = Value->getRawValue();
+  StringRef Tmp;
+  if (!StrTab) {
+    Tmp = Value->getRawValue();
+  } else {
+    // If we have a string table, parse it as an unsigned.
+    unsigned StrID = 0;
+    if (Error E = parseUnsigned(StrID, Node))
+      return E;
+    if (Expected<StringRef> Str = (*StrTab)[StrID])
+      Tmp = *Str;
+    else
+      return Str.takeError();
+  }
 
   if (Tmp.front() == '\'')
     Tmp = Tmp.drop_front();

Modified: llvm/trunk/lib/Remarks/YAMLRemarkParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/YAMLRemarkParser.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/YAMLRemarkParser.h (original)
+++ llvm/trunk/lib/Remarks/YAMLRemarkParser.h Tue Apr 23 17:06:24 2019
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
@@ -38,7 +39,8 @@ struct YAMLRemarkParser {
   raw_string_ostream ErrorStream;
   /// Temporary parsing buffer for the arguments.
   SmallVector<Argument, 8> TmpArgs;
-
+  /// The string table used for parsing strings.
+  Optional<ParsedStringTable> StrTab;
   /// The state used by the parser to parse a remark entry. Invalidated with
   /// every call to `parseYAMLElement`.
   struct ParseState {
@@ -57,10 +59,13 @@ struct YAMLRemarkParser {
   /// not be containing any value.
   Optional<ParseState> State;
 
-  YAMLRemarkParser(StringRef Buf)
+  YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None)
       : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
-        TmpArgs() {
+        TmpArgs(), StrTab() {
     SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
+
+    if (StrTabBuf)
+      StrTab.emplace(*StrTabBuf);
   }
 
   /// Parse a YAML element.
@@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImp
   /// Set to `true` if we had any errors during parsing.
   bool HasErrors = false;
 
-  YAMLParserImpl(StringRef Buf)
-      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf),
+  YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None)
+      : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf),
         YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
 
   static bool classof(const ParserImpl *PI) {

Modified: llvm/trunk/test/CodeGen/X86/remarks-section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/remarks-section.ll?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/remarks-section.ll (original)
+++ llvm/trunk/test/CodeGen/X86/remarks-section.ll Tue Apr 23 17:06:24 2019
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -remarks-section -pass-remarks-output=%/t.yaml | FileCheck -DPATH=%/t.yaml %s
 ; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s
+; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -remarks-yaml-string-table -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s
 
 ; CHECK-LABEL: func1:
 
@@ -11,6 +12,11 @@
 ; The version:
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
@@ -24,10 +30,50 @@
 ; The version:
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
 ; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
 ; The remark file path:
 ; CHECK-DARWIN-NEXT: .ascii "[[PATH]]"
 ; Null-terminator:
 ; CHECK-DARWIN-NEXT: .byte 0
+
+; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug
+; The magic number:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "REMARKS"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The version:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The size of the string table:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x71, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "prologepilog"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "StackSize"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "func1"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 48
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " stack bytes in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "asm-printer"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "InstructionCount"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 49
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " instructions in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The remark file path:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "[[PATH]]"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
 define void @func1() {
   ret void
 }

Modified: llvm/trunk/unittests/Remarks/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/CMakeLists.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/unittests/Remarks/CMakeLists.txt (original)
+++ llvm/trunk/unittests/Remarks/CMakeLists.txt Tue Apr 23 17:06:24 2019
@@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_llvm_unittest(RemarksTests
+  RemarksStrTabParsingTest.cpp
   YAMLRemarksParsingTest.cpp
   )

Added: llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp?rev=359050&view=auto
==============================================================================
--- llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp (added)
+++ llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp Tue Apr 23 17:06:24 2019
@@ -0,0 +1,39 @@
+//===- unittest/Support/RemarksStrTabParsingTest.cpp - StrTab tests -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(RemarksStrTab, ParsingEmpty) {
+  StringRef Empty("", 0);
+  remarks::ParsedStringTable StrTab(Empty);
+  Expected<StringRef> Nothing = StrTab[0];
+  EXPECT_FALSE(static_cast<bool>(Nothing));
+  EXPECT_EQ(toString(Nothing.takeError()),
+            "String with index 0 is out of bounds (size = 0).");
+}
+
+TEST(RemarksStrTab, ParsingGood) {
+  StringRef Strings("str1\0str2\0str3\0str4", 20);
+  remarks::ParsedStringTable StrTab(Strings);
+  Expected<StringRef> Result = StrTab[0];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str1");
+  Result = StrTab[1];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str2");
+  Result = StrTab[2];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str3");
+  Result = StrTab[3];
+  EXPECT_TRUE(static_cast<bool>(Result));
+  EXPECT_EQ(*Result, "str4");
+}

Modified: llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp (original)
+++ llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp Tue Apr 23 17:06:24 2019
@@ -492,3 +492,105 @@ TEST(YAMLRemarks, ContentsCAPI) {
   EXPECT_FALSE(LLVMRemarkParserHasError(Parser));
   LLVMRemarkParserDispose(Parser);
 }
+
+TEST(YAMLRemarks, ContentsStrTab) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 0\n"
+                  "Name: 1\n"
+                  "DebugLoc: { File: 2, Line: 3, Column: 12 }\n"
+                  "Function: 3\n"
+                  "Hotness: 4\n"
+                  "Args:\n"
+                  "  - Callee: 5\n"
+                  "  - String: 7\n"
+                  "  - Caller: 3\n"
+                  "    DebugLoc: { File: 2, Line: 2, Column: 0 }\n"
+                  "  - String: 8\n"
+                  "\n";
+
+  StringRef StrTabBuf =
+      StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 "
+                "will not be inlined into \0 because its definition is "
+                "unavailable",
+                115);
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_TRUE(*RemarkOrErr != nullptr);
+
+  const remarks::Remark &Remark = **RemarkOrErr;
+  EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
+  EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
+  EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
+  EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo");
+  EXPECT_TRUE(Remark.Loc);
+  const remarks::RemarkLocation &RL = *Remark.Loc;
+  EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+  EXPECT_EQ(RL.SourceLine, 3U);
+  EXPECT_EQ(RL.SourceColumn, 12U);
+  EXPECT_TRUE(Remark.Hotness);
+  EXPECT_EQ(*Remark.Hotness, 4U);
+  EXPECT_EQ(Remark.Args.size(), 4U);
+
+  unsigned ArgID = 0;
+  for (const remarks::Argument &Arg : Remark.Args) {
+    switch (ArgID) {
+    case 0:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Callee");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "bar");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 1:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into ");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    case 2: {
+      EXPECT_EQ(checkStr(Arg.Key, 6), "Caller");
+      EXPECT_EQ(checkStr(Arg.Val, 3), "foo");
+      EXPECT_TRUE(Arg.Loc);
+      const remarks::RemarkLocation &RL = *Arg.Loc;
+      EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+      EXPECT_EQ(RL.SourceLine, 2U);
+      EXPECT_EQ(RL.SourceColumn, 0U);
+      break;
+    }
+    case 3:
+      EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+      EXPECT_EQ(checkStr(Arg.Val, 38),
+                " because its definition is unavailable");
+      EXPECT_FALSE(Arg.Loc);
+      break;
+    default:
+      break;
+    }
+    ++ArgID;
+  }
+
+  RemarkOrErr = Parser.getNext();
+  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+  EXPECT_EQ(*RemarkOrErr, nullptr);
+}
+
+TEST(YAMLRemarks, ParsingBadStringTableIndex) {
+  StringRef Buf = "\n"
+                  "--- !Missed\n"
+                  "Pass: 50\n"
+                  "\n";
+
+  StringRef StrTabBuf = StringRef("inline");
+
+  remarks::Parser Parser(Buf, StrTabBuf);
+  Expected<const remarks::Remark *> Remark = Parser.getNext();
+  EXPECT_FALSE(Remark); // Expect an error here.
+
+  std::string ErrorStr;
+  raw_string_ostream Stream(ErrorStr);
+  handleAllErrors(Remark.takeError(),
+                  [&](const ErrorInfoBase &EIB) { EIB.log(Stream); });
+  EXPECT_TRUE(
+      StringRef(Stream.str())
+          .contains("String with index 50 is out of bounds (size = 1)."));
+}




More information about the llvm-commits mailing list