[llvm] r359050 - [Remarks] Add string deduplication using a string table
Francis Visoiu Mistrih via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 17:06:25 PDT 2019
Author: thegameg
Date: Tue Apr 23 17:06:24 2019
New Revision: 359050
URL: http://llvm.org/viewvc/llvm-project?rev=359050&view=rev
Log:
[Remarks] Add string deduplication using a string table
* Add support for uniquing strings in the remark streamer and emitting the string table in the remarks section.
* Add parsing support for the string table in the RemarkParser.
>From this remark:
```
--- !Missed
Pass: inline
Name: NoDefinition
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 7, Column: 3 }
Function: printArgsNoRet
Args:
- Callee: printf
- String: ' will not be inlined into '
- Caller: printArgsNoRet
DebugLoc: { File: 'test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c',
Line: 6, Column: 0 }
- String: ' because its definition is unavailable'
...
```
to:
```
--- !Missed
Pass: 0
Name: 1
DebugLoc: { File: 3, Line: 7, Column: 3 }
Function: 2
Args:
- Callee: 4
- String: 5
- Caller: 2
DebugLoc: { File: 3, Line: 6, Column: 0 }
- String: 6
...
```
And the string table in the .remarks/__remarks section containing:
```
inline\0NoDefinition\0printArgsNoRet\0
test-suite/SingleSource/UnitTests/2002-04-17-PrintfChar.c\0printf\0
will not be inlined into \0 because its definition is unavailable\0
```
This is mostly supposed to be used for testing purposes, but it gives us
a 2x reduction in the remark size, and is an incremental change for the
updates to the remarks file format.
Differential Revision: https://reviews.llvm.org/D60227
Added:
llvm/trunk/include/llvm/Remarks/RemarkStringTable.h
llvm/trunk/lib/Remarks/RemarkStringTable.cpp
llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp
Modified:
llvm/trunk/docs/CodeGenerator.rst
llvm/trunk/include/llvm/IR/RemarkStreamer.h
llvm/trunk/include/llvm/Remarks/RemarkParser.h
llvm/trunk/include/llvm/Support/YAMLTraits.h
llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt
llvm/trunk/lib/IR/DiagnosticInfo.cpp
llvm/trunk/lib/IR/LLVMBuild.txt
llvm/trunk/lib/IR/RemarkStreamer.cpp
llvm/trunk/lib/Remarks/CMakeLists.txt
llvm/trunk/lib/Remarks/RemarkParser.cpp
llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp
llvm/trunk/lib/Remarks/YAMLRemarkParser.h
llvm/trunk/test/CodeGen/X86/remarks-section.ll
llvm/trunk/unittests/Remarks/CMakeLists.txt
llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp
Modified: llvm/trunk/docs/CodeGenerator.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CodeGenerator.rst?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/docs/CodeGenerator.rst (original)
+++ llvm/trunk/docs/CodeGenerator.rst Tue Apr 23 17:06:24 2019
@@ -1597,6 +1597,10 @@ A section containing metadata on remark
* a magic number: "REMARKS\0"
* the version number: a little-endian uint64_t
+* the string table:
+ * the total size of the string table (the size itself excluded):
+ little-endian uint64_t
+ * a list of null-terminated strings
* the absolute file path to the serialized remark diagnostics: a
null-terminated string.
Modified: llvm/trunk/include/llvm/IR/RemarkStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/RemarkStreamer.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/RemarkStreamer.h (original)
+++ llvm/trunk/include/llvm/IR/RemarkStreamer.h Tue Apr 23 17:06:24 2019
@@ -14,10 +14,11 @@
#define LLVM_IR_REMARKSTREAMER_H
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Regex.h"
#include <string>
#include <vector>
@@ -34,6 +35,11 @@ class RemarkStreamer {
/// The YAML streamer.
yaml::Output YAMLOutput;
+ /// The string table containing all the unique strings used in the output.
+ /// The table will be serialized in a section to be consumed after the
+ /// compilation.
+ remarks::StringTable StrTab;
+
public:
RemarkStreamer(StringRef Filename, raw_ostream& OS);
/// Return the filename that the remark diagnostics are emitted to.
@@ -45,6 +51,9 @@ public:
Error setFilter(StringRef Filter);
/// Emit a diagnostic through the streamer.
void emit(const DiagnosticInfoOptimizationBase &Diag);
+ /// The string table used during emission.
+ remarks::StringTable &getStringTable() { return StrTab; }
+ const remarks::StringTable &getStringTable() const { return StrTab; }
};
} // end namespace llvm
Modified: llvm/trunk/include/llvm/Remarks/RemarkParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Remarks/RemarkParser.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Remarks/RemarkParser.h (original)
+++ llvm/trunk/include/llvm/Remarks/RemarkParser.h Tue Apr 23 17:06:24 2019
@@ -13,6 +13,7 @@
#ifndef LLVM_REMARKS_REMARK_PARSER_H
#define LLVM_REMARKS_REMARK_PARSER_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Support/Error.h"
@@ -32,6 +33,11 @@ struct Parser {
/// This constructor should be only used for parsing YAML remarks.
Parser(StringRef Buffer);
+ /// Create a parser parsing \p Buffer to Remark objects, using \p StrTabBuf as
+ /// string table.
+ /// This constructor should be only used for parsing YAML remarks.
+ Parser(StringRef Buffer, StringRef StrTabBuf);
+
// Needed because ParserImpl is an incomplete type.
~Parser();
@@ -40,6 +46,18 @@ struct Parser {
Expected<const Remark *> getNext() const;
};
+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+ /// The buffer mapped from the section contents.
+ StringRef Buffer;
+ /// Collection of offsets in the buffer for each string entry.
+ SmallVector<size_t, 8> Offsets;
+
+ Expected<StringRef> operator[](size_t Index);
+ ParsedStringTable(StringRef Buffer);
+};
+
} // end namespace remarks
} // end namespace llvm
Added: llvm/trunk/include/llvm/Remarks/RemarkStringTable.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Remarks/RemarkStringTable.h?rev=359050&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Remarks/RemarkStringTable.h (added)
+++ llvm/trunk/include/llvm/Remarks/RemarkStringTable.h Tue Apr 23 17:06:24 2019
@@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+ /// Allocator holding all the memory used by the map.
+ BumpPtrAllocator Allocator;
+ /// The string table containing all the unique strings used in the output.
+ /// It maps a string to an unique ID.
+ StringMap<unsigned, BumpPtrAllocator &> StrTab;
+ /// Total size of the string table when serialized.
+ size_t SerializedSize = 0;
+
+ StringTable() : Allocator(), StrTab(Allocator) {}
+ /// Add a string to the table. It returns an unique ID of the string.
+ std::pair<unsigned, StringRef> add(StringRef Str);
+ /// Serialize the string table to a stream. It is serialized as a little
+ /// endian uint64 (the size of the table in bytes) followed by a sequence of
+ /// NULL-terminated strings, where the N-th string is the string with the ID N
+ /// in the StrTab map.
+ void serialize(raw_ostream &OS) const;
+ /// Serialize the string table to a vector. This allows users to do the actual
+ /// writing to file/memory/other.
+ /// The string with the ID == N should be the N-th element in the vector.
+ std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */
Modified: llvm/trunk/include/llvm/Support/YAMLTraits.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/YAMLTraits.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/YAMLTraits.h (original)
+++ llvm/trunk/include/llvm/Support/YAMLTraits.h Tue Apr 23 17:06:24 2019
@@ -1905,6 +1905,11 @@ struct SequenceTraits<SmallVector<T, N>,
typename std::enable_if<CheckIsBool<
SequenceElementTraits<T>::flow>::value>::type>
: SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+ typename std::enable_if<CheckIsBool<
+ SequenceElementTraits<T>::flow>::value>::type>
+ : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};
// Sequences of fundamental types use flow formatting.
template <typename T>
Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue Apr 23 17:06:24 2019
@@ -1362,6 +1362,29 @@ void AsmPrinter::emitRemarksSection(Modu
support::endian::write64le(Version.data(), remarks::Version);
OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
+ // Emit the string table in the section.
+ // Note: we need to use the streamer here to emit it in the section. We can't
+ // just use the serialize function with a raw_ostream because of the way
+ // MCStreamers work.
+ const remarks::StringTable &StrTab = RS->getStringTable();
+ std::vector<StringRef> StrTabStrings = StrTab.serialize();
+ uint64_t StrTabSize = StrTab.SerializedSize;
+ // Emit the total size of the string table (the size itself excluded):
+ // little-endian uint64_t.
+ // The total size is located after the version number.
+ std::array<char, 8> StrTabSizeBuf;
+ support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+ OutStreamer->EmitBinaryData(
+ StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+ // Emit a list of null-terminated strings.
+ // Note: the order is important here: the ID used in the remarks corresponds
+ // to the position of the string in the section.
+ for (StringRef Str : StrTabStrings) {
+ OutStreamer->EmitBytes(Str);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ }
+
// Emit the null-terminated absolute path to the remark file.
// The path is located at the offset 0x4 in the section.
StringRef FilenameRef = RS->getFilename();
Modified: llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/LLVMBuild.txt Tue Apr 23 17:06:24 2019
@@ -18,4 +18,4 @@
type = Library
name = AsmPrinter
parent = Libraries
-required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoDWARF DebugInfoMSF MC MCParser Remarks Support Target
Modified: llvm/trunk/lib/IR/DiagnosticInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/DiagnosticInfo.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/DiagnosticInfo.cpp (original)
+++ llvm/trunk/lib/IR/DiagnosticInfo.cpp Tue Apr 23 17:06:24 2019
@@ -43,6 +43,8 @@
using namespace llvm;
+cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
+
int llvm::getNextAvailablePluginDiagnosticKind() {
static std::atomic<int> PluginKindID(DK_FirstPluginKind);
return ++PluginKindID;
@@ -373,6 +375,20 @@ std::string DiagnosticInfoOptimizationBa
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
+template <typename T>
+static void mapRemarkHeader(
+ yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
+ T FunctionName, Optional<uint64_t> Hotness,
+ SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
+ io.mapRequired("Pass", PassName);
+ io.mapRequired("Name", RemarkName);
+ if (!io.outputting() || DL.isValid())
+ io.mapOptional("DebugLoc", DL);
+ io.mapRequired("Function", FunctionName);
+ io.mapOptional("Hotness", Hotness);
+ io.mapOptional("Args", Args);
+}
+
namespace llvm {
namespace yaml {
@@ -413,13 +429,18 @@ void MappingTraits<DiagnosticInfoOptimiz
GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
StringRef PassName(OptDiag->PassName);
- io.mapRequired("Pass", PassName);
- io.mapRequired("Name", OptDiag->RemarkName);
- if (!io.outputting() || DL.isValid())
- io.mapOptional("DebugLoc", DL);
- io.mapRequired("Function", FN);
- io.mapOptional("Hotness", OptDiag->Hotness);
- io.mapOptional("Args", OptDiag->Args);
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ unsigned PassID = StrTab.add(PassName).first;
+ unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
+ unsigned FunctionID = StrTab.add(FN).first;
+ mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
+ OptDiag->Args);
+ } else {
+ mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
+ OptDiag->Args);
+ }
}
template <> struct MappingTraits<DiagnosticLocation> {
@@ -430,7 +451,15 @@ template <> struct MappingTraits<Diagnos
unsigned Line = DL.getLine();
unsigned Col = DL.getColumn();
- io.mapRequired("File", File);
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ unsigned FileID = StrTab.add(File).first;
+ io.mapRequired("File", FileID);
+ } else {
+ io.mapRequired("File", File);
+ }
+
io.mapRequired("Line", Line);
io.mapRequired("Column", Col);
}
@@ -459,12 +488,18 @@ template <> struct BlockScalarTraits<Str
template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
assert(io.outputting() && "input not yet implemented");
- // Emit a string block scalar for multiline strings, to preserve newlines.
- if (StringRef(A.Val).count('\n') > 1) {
+
+ if (UseStringTable) {
+ remarks::StringTable &StrTab =
+ reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
+ auto ValueID = StrTab.add(A.Val).first;
+ io.mapRequired(A.Key.data(), ValueID);
+ } else if (StringRef(A.Val).count('\n') > 1) {
StringBlockVal S(A.Val);
io.mapRequired(A.Key.data(), S);
- } else
+ } else {
io.mapRequired(A.Key.data(), A.Val);
+ }
if (A.Loc.isValid())
io.mapOptional("DebugLoc", A.Loc);
}
Modified: llvm/trunk/lib/IR/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/LLVMBuild.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/LLVMBuild.txt (original)
+++ llvm/trunk/lib/IR/LLVMBuild.txt Tue Apr 23 17:06:24 2019
@@ -18,4 +18,4 @@
type = Library
name = Core
parent = Libraries
-required_libraries = BinaryFormat Support
+required_libraries = BinaryFormat Remarks Support
Modified: llvm/trunk/lib/IR/RemarkStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/RemarkStreamer.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/IR/RemarkStreamer.cpp (original)
+++ llvm/trunk/lib/IR/RemarkStreamer.cpp Tue Apr 23 17:06:24 2019
@@ -17,7 +17,7 @@ using namespace llvm;
RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
: Filename(Filename), OS(OS),
- YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+ YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
assert(!Filename.empty() && "This needs to be a real filename.");
}
Modified: llvm/trunk/lib/Remarks/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/CMakeLists.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/CMakeLists.txt (original)
+++ llvm/trunk/lib/Remarks/CMakeLists.txt Tue Apr 23 17:06:24 2019
@@ -1,5 +1,6 @@
add_llvm_library(LLVMRemarks
Remark.cpp
RemarkParser.cpp
+ RemarkStringTable.cpp
YAMLRemarkParser.cpp
)
Modified: llvm/trunk/lib/Remarks/RemarkParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/RemarkParser.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/RemarkParser.cpp (original)
+++ llvm/trunk/lib/Remarks/RemarkParser.cpp Tue Apr 23 17:06:24 2019
@@ -22,6 +22,9 @@ using namespace llvm::remarks;
Parser::Parser(StringRef Buf) : Impl(llvm::make_unique<YAMLParserImpl>(Buf)) {}
+Parser::Parser(StringRef Buf, StringRef StrTabBuf)
+ : Impl(llvm::make_unique<YAMLParserImpl>(Buf, StrTabBuf)) {}
+
Parser::~Parser() = default;
static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
@@ -56,6 +59,31 @@ Expected<const Remark *> Parser::getNext
llvm_unreachable("Get next called with an unknown parsing implementation.");
}
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+ while (!InBuffer.empty()) {
+ // Strings are separated by '\0' bytes.
+ std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+ // We only store the offset from the beginning of the buffer.
+ Offsets.push_back(Split.first.data() - Buffer.data());
+ InBuffer = Split.second;
+ }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) {
+ if (Index >= Offsets.size())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "String with index %u is out of bounds (size = %u).", Index,
+ Offsets.size());
+
+ size_t Offset = Offsets[Index];
+ // If it's the last offset, we can't use the next offset to know the size of
+ // the string.
+ size_t NextOffset =
+ (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+ return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
Added: llvm/trunk/lib/Remarks/RemarkStringTable.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/RemarkStringTable.cpp?rev=359050&view=auto
==============================================================================
--- llvm/trunk/lib/Remarks/RemarkStringTable.cpp (added)
+++ llvm/trunk/lib/Remarks/RemarkStringTable.cpp Tue Apr 23 17:06:24 2019
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+ size_t NextID = StrTab.size();
+ auto KV = StrTab.insert({Str, NextID});
+ // If it's a new string, add it to the final size.
+ if (KV.second)
+ SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+ // Can be either NextID or the previous ID if the string is already there.
+ return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+ // Emit the number of strings.
+ uint64_t StrTabSize = SerializedSize;
+ support::endian::write(OS, StrTabSize, support::little);
+ // Emit the sequence of strings.
+ for (StringRef Str : serialize()) {
+ OS << Str;
+ // Explicitly emit a '\0'.
+ OS.write('\0');
+ }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+ std::vector<StringRef> Strings{StrTab.size()};
+ for (const auto &KV : StrTab)
+ Strings[KV.second] = KV.first();
+ return Strings;
+}
Modified: llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp (original)
+++ llvm/trunk/lib/Remarks/YAMLRemarkParser.cpp Tue Apr 23 17:06:24 2019
@@ -34,7 +34,19 @@ Error YAMLRemarkParser::parseStr(T &Resu
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
if (!Value)
return make_error<YAMLParseError>("expected a value of scalar type.", Node);
- StringRef Tmp = Value->getRawValue();
+ StringRef Tmp;
+ if (!StrTab) {
+ Tmp = Value->getRawValue();
+ } else {
+ // If we have a string table, parse it as an unsigned.
+ unsigned StrID = 0;
+ if (Error E = parseUnsigned(StrID, Node))
+ return E;
+ if (Expected<StringRef> Str = (*StrTab)[StrID])
+ Tmp = *Str;
+ else
+ return Str.takeError();
+ }
if (Tmp.front() == '\'')
Tmp = Tmp.drop_front();
Modified: llvm/trunk/lib/Remarks/YAMLRemarkParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Remarks/YAMLRemarkParser.h?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/lib/Remarks/YAMLRemarkParser.h (original)
+++ llvm/trunk/lib/Remarks/YAMLRemarkParser.h Tue Apr 23 17:06:24 2019
@@ -17,6 +17,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
@@ -38,7 +39,8 @@ struct YAMLRemarkParser {
raw_string_ostream ErrorStream;
/// Temporary parsing buffer for the arguments.
SmallVector<Argument, 8> TmpArgs;
-
+ /// The string table used for parsing strings.
+ Optional<ParsedStringTable> StrTab;
/// The state used by the parser to parse a remark entry. Invalidated with
/// every call to `parseYAMLElement`.
struct ParseState {
@@ -57,10 +59,13 @@ struct YAMLRemarkParser {
/// not be containing any value.
Optional<ParseState> State;
- YAMLRemarkParser(StringRef Buf)
+ YAMLRemarkParser(StringRef Buf, Optional<StringRef> StrTabBuf = None)
: SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
- TmpArgs() {
+ TmpArgs(), StrTab() {
SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
+
+ if (StrTabBuf)
+ StrTab.emplace(*StrTabBuf);
}
/// Parse a YAML element.
@@ -122,8 +127,8 @@ struct YAMLParserImpl : public ParserImp
/// Set to `true` if we had any errors during parsing.
bool HasErrors = false;
- YAMLParserImpl(StringRef Buf)
- : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf),
+ YAMLParserImpl(StringRef Buf, Optional<StringRef> StrTabBuf = None)
+ : ParserImpl{ParserImpl::Kind::YAML}, YAMLParser(Buf, StrTabBuf),
YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
static bool classof(const ParserImpl *PI) {
Modified: llvm/trunk/test/CodeGen/X86/remarks-section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/remarks-section.ll?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/remarks-section.ll (original)
+++ llvm/trunk/test/CodeGen/X86/remarks-section.ll Tue Apr 23 17:06:24 2019
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-linux -remarks-section -pass-remarks-output=%/t.yaml | FileCheck -DPATH=%/t.yaml %s
; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s
+; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -remarks-yaml-string-table -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s
; CHECK-LABEL: func1:
@@ -11,6 +12,11 @@
; The version:
; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
; The remark file path:
; CHECK-NEXT: .ascii "[[PATH]]"
; Null-terminator:
@@ -24,10 +30,50 @@
; The version:
; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table size:
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; EMPTY
; The remark file path:
; CHECK-DARWIN-NEXT: .ascii "[[PATH]]"
; Null-terminator:
; CHECK-DARWIN-NEXT: .byte 0
+
+; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug
+; The magic number:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "REMARKS"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The version:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The size of the string table:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x71, 0x00, 0x00, 0x00
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0x00, 0x00, 0x00, 0x00
+; The string table:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "prologepilog"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "StackSize"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "func1"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 48
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " stack bytes in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "asm-printer"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "InstructionCount"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .byte 49
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; CHECK-DARWIN-STRTAB-NEXT: .ascii " instructions in function"
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
+; The remark file path:
+; CHECK-DARWIN-STRTAB-NEXT: .ascii "[[PATH]]"
+; Null-terminator:
+; CHECK-DARWIN-STRTAB-NEXT: .byte 0
define void @func1() {
ret void
}
Modified: llvm/trunk/unittests/Remarks/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/CMakeLists.txt?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/unittests/Remarks/CMakeLists.txt (original)
+++ llvm/trunk/unittests/Remarks/CMakeLists.txt Tue Apr 23 17:06:24 2019
@@ -4,5 +4,6 @@ set(LLVM_LINK_COMPONENTS
)
add_llvm_unittest(RemarksTests
+ RemarksStrTabParsingTest.cpp
YAMLRemarksParsingTest.cpp
)
Added: llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp?rev=359050&view=auto
==============================================================================
--- llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp (added)
+++ llvm/trunk/unittests/Remarks/RemarksStrTabParsingTest.cpp Tue Apr 23 17:06:24 2019
@@ -0,0 +1,39 @@
+//===- unittest/Support/RemarksStrTabParsingTest.cpp - StrTab tests -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(RemarksStrTab, ParsingEmpty) {
+ StringRef Empty("", 0);
+ remarks::ParsedStringTable StrTab(Empty);
+ Expected<StringRef> Nothing = StrTab[0];
+ EXPECT_FALSE(static_cast<bool>(Nothing));
+ EXPECT_EQ(toString(Nothing.takeError()),
+ "String with index 0 is out of bounds (size = 0).");
+}
+
+TEST(RemarksStrTab, ParsingGood) {
+ StringRef Strings("str1\0str2\0str3\0str4", 20);
+ remarks::ParsedStringTable StrTab(Strings);
+ Expected<StringRef> Result = StrTab[0];
+ EXPECT_TRUE(static_cast<bool>(Result));
+ EXPECT_EQ(*Result, "str1");
+ Result = StrTab[1];
+ EXPECT_TRUE(static_cast<bool>(Result));
+ EXPECT_EQ(*Result, "str2");
+ Result = StrTab[2];
+ EXPECT_TRUE(static_cast<bool>(Result));
+ EXPECT_EQ(*Result, "str3");
+ Result = StrTab[3];
+ EXPECT_TRUE(static_cast<bool>(Result));
+ EXPECT_EQ(*Result, "str4");
+}
Modified: llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp?rev=359050&r1=359049&r2=359050&view=diff
==============================================================================
--- llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp (original)
+++ llvm/trunk/unittests/Remarks/YAMLRemarksParsingTest.cpp Tue Apr 23 17:06:24 2019
@@ -492,3 +492,105 @@ TEST(YAMLRemarks, ContentsCAPI) {
EXPECT_FALSE(LLVMRemarkParserHasError(Parser));
LLVMRemarkParserDispose(Parser);
}
+
+TEST(YAMLRemarks, ContentsStrTab) {
+ StringRef Buf = "\n"
+ "--- !Missed\n"
+ "Pass: 0\n"
+ "Name: 1\n"
+ "DebugLoc: { File: 2, Line: 3, Column: 12 }\n"
+ "Function: 3\n"
+ "Hotness: 4\n"
+ "Args:\n"
+ " - Callee: 5\n"
+ " - String: 7\n"
+ " - Caller: 3\n"
+ " DebugLoc: { File: 2, Line: 2, Column: 0 }\n"
+ " - String: 8\n"
+ "\n";
+
+ StringRef StrTabBuf =
+ StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 "
+ "will not be inlined into \0 because its definition is "
+ "unavailable",
+ 115);
+
+ remarks::Parser Parser(Buf, StrTabBuf);
+ Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
+ EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+ EXPECT_TRUE(*RemarkOrErr != nullptr);
+
+ const remarks::Remark &Remark = **RemarkOrErr;
+ EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
+ EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
+ EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
+ EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo");
+ EXPECT_TRUE(Remark.Loc);
+ const remarks::RemarkLocation &RL = *Remark.Loc;
+ EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+ EXPECT_EQ(RL.SourceLine, 3U);
+ EXPECT_EQ(RL.SourceColumn, 12U);
+ EXPECT_TRUE(Remark.Hotness);
+ EXPECT_EQ(*Remark.Hotness, 4U);
+ EXPECT_EQ(Remark.Args.size(), 4U);
+
+ unsigned ArgID = 0;
+ for (const remarks::Argument &Arg : Remark.Args) {
+ switch (ArgID) {
+ case 0:
+ EXPECT_EQ(checkStr(Arg.Key, 6), "Callee");
+ EXPECT_EQ(checkStr(Arg.Val, 3), "bar");
+ EXPECT_FALSE(Arg.Loc);
+ break;
+ case 1:
+ EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+ EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into ");
+ EXPECT_FALSE(Arg.Loc);
+ break;
+ case 2: {
+ EXPECT_EQ(checkStr(Arg.Key, 6), "Caller");
+ EXPECT_EQ(checkStr(Arg.Val, 3), "foo");
+ EXPECT_TRUE(Arg.Loc);
+ const remarks::RemarkLocation &RL = *Arg.Loc;
+ EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c");
+ EXPECT_EQ(RL.SourceLine, 2U);
+ EXPECT_EQ(RL.SourceColumn, 0U);
+ break;
+ }
+ case 3:
+ EXPECT_EQ(checkStr(Arg.Key, 6), "String");
+ EXPECT_EQ(checkStr(Arg.Val, 38),
+ " because its definition is unavailable");
+ EXPECT_FALSE(Arg.Loc);
+ break;
+ default:
+ break;
+ }
+ ++ArgID;
+ }
+
+ RemarkOrErr = Parser.getNext();
+ EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
+ EXPECT_EQ(*RemarkOrErr, nullptr);
+}
+
+TEST(YAMLRemarks, ParsingBadStringTableIndex) {
+ StringRef Buf = "\n"
+ "--- !Missed\n"
+ "Pass: 50\n"
+ "\n";
+
+ StringRef StrTabBuf = StringRef("inline");
+
+ remarks::Parser Parser(Buf, StrTabBuf);
+ Expected<const remarks::Remark *> Remark = Parser.getNext();
+ EXPECT_FALSE(Remark); // Expect an error here.
+
+ std::string ErrorStr;
+ raw_string_ostream Stream(ErrorStr);
+ handleAllErrors(Remark.takeError(),
+ [&](const ErrorInfoBase &EIB) { EIB.log(Stream); });
+ EXPECT_TRUE(
+ StringRef(Stream.str())
+ .contains("String with index 50 is out of bounds (size = 1)."));
+}
More information about the llvm-commits
mailing list