[llvm] r357749 - Minidump: Add support for reading/writing strings

Pavel Labath via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 5 01:06:27 PDT 2019


Author: labath
Date: Fri Apr  5 01:06:26 2019
New Revision: 357749

URL: http://llvm.org/viewvc/llvm-project?rev=357749&view=rev
Log:
Minidump: Add support for reading/writing strings

Summary:
Strings in minidump files are stored as a 32-bit length field, giving
the length of the string in *bytes*, which is followed by the
appropriate number of UTF16 code units. The string is also supposed to
be null-terminated, and the null-terminator is not a part of the length
field. This patch:
- adds support for reading these strings out of the minidump file (this
  implementation does not depend on proper null-termination)
- adds support for writing them to a minidump file
- using the previous two pieces implements proper (de)serialization of
  the CSDVersion field of the SystemInfo stream. Previously, this was
  only read/written as hex, and no attempt was made to access the
  referenced string -- now this string is read and written correctly.

The changes are tested via yaml2obj|obj2yaml round-trip as well as a
unit test which checks the corner cases of the string deserialization
logic.

Reviewers: jhenderson, zturner, clayborg

Subscribers: llvm-commits, aprantl, markmentovai, amccarth, lldb-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59775

Modified:
    llvm/trunk/include/llvm/Object/Minidump.h
    llvm/trunk/include/llvm/ObjectYAML/MinidumpYAML.h
    llvm/trunk/lib/Object/Minidump.cpp
    llvm/trunk/lib/ObjectYAML/MinidumpYAML.cpp
    llvm/trunk/test/tools/obj2yaml/basic-minidump.yaml
    llvm/trunk/unittests/Object/MinidumpTest.cpp
    llvm/trunk/unittests/ObjectYAML/MinidumpYAMLTest.cpp

Modified: llvm/trunk/include/llvm/Object/Minidump.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/Minidump.h?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Object/Minidump.h (original)
+++ llvm/trunk/include/llvm/Object/Minidump.h Fri Apr  5 01:06:26 2019
@@ -43,6 +43,10 @@ public:
   /// file does not contain a stream of this type.
   Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
 
+  /// Returns the minidump string at the given offset. An error is returned if
+  /// we fail to parse the string, or the string is invalid UTF16.
+  Expected<std::string> getString(size_t Offset) const;
+
   /// Returns the contents of the SystemInfo stream, cast to the appropriate
   /// type. An error is returned if the file does not contain this stream, or
   /// the stream is smaller than the size of the SystemInfo structure. The

Modified: llvm/trunk/include/llvm/ObjectYAML/MinidumpYAML.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ObjectYAML/MinidumpYAML.h?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ObjectYAML/MinidumpYAML.h (original)
+++ llvm/trunk/include/llvm/ObjectYAML/MinidumpYAML.h Fri Apr  5 01:06:26 2019
@@ -67,10 +67,12 @@ struct RawContentStream : public Stream
 /// SystemInfo minidump stream.
 struct SystemInfoStream : public Stream {
   minidump::SystemInfo Info;
+  std::string CSDVersion;
 
-  explicit SystemInfoStream(const minidump::SystemInfo &Info)
+  explicit SystemInfoStream(const minidump::SystemInfo &Info,
+                            std::string CSDVersion)
       : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
-        Info(Info) {}
+        Info(Info), CSDVersion(std::move(CSDVersion)) {}
 
   SystemInfoStream()
       : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {

Modified: llvm/trunk/lib/Object/Minidump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/Minidump.cpp?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/lib/Object/Minidump.cpp (original)
+++ llvm/trunk/lib/Object/Minidump.cpp Fri Apr  5 01:06:26 2019
@@ -8,6 +8,7 @@
 
 #include "llvm/Object/Minidump.h"
 #include "llvm/Object/Error.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -21,6 +22,33 @@ MinidumpFile::getRawStream(minidump::Str
   return None;
 }
 
+Expected<std::string> MinidumpFile::getString(size_t Offset) const {
+  // Minidump strings consist of a 32-bit length field, which gives the size of
+  // the string in *bytes*. This is followed by the actual string encoded in
+  // UTF16.
+  auto ExpectedSize =
+      getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
+  if (!ExpectedSize)
+    return ExpectedSize.takeError();
+  size_t Size = (*ExpectedSize)[0];
+  if (Size % 2 != 0)
+    return createError("String size not even");
+  Size /= 2;
+  if (Size == 0)
+    return "";
+
+  Offset += sizeof(support::ulittle32_t);
+  auto ExpectedData = getDataSliceAs<UTF16>(getData(), Offset, Size);
+  if (!ExpectedData)
+    return ExpectedData.takeError();
+
+  std::string Result;
+  if (!convertUTF16ToUTF8String(*ExpectedData, Result))
+    return createError("String decoding failed");
+
+  return Result;
+}
+
 Expected<ArrayRef<uint8_t>>
 MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
   // Check for overflow.

Modified: llvm/trunk/lib/ObjectYAML/MinidumpYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ObjectYAML/MinidumpYAML.cpp?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/lib/ObjectYAML/MinidumpYAML.cpp (original)
+++ llvm/trunk/lib/ObjectYAML/MinidumpYAML.cpp Fri Apr  5 01:06:26 2019
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
 using namespace llvm::MinidumpYAML;
@@ -39,6 +40,8 @@ public:
     return allocateArray(makeArrayRef(Data));
   }
 
+  size_t allocateString(StringRef Str);
+
   void writeTo(raw_ostream &OS) const;
 
 private:
@@ -48,6 +51,26 @@ private:
 };
 } // namespace
 
+size_t BlobAllocator::allocateString(StringRef Str) {
+  SmallVector<UTF16, 32> WStr;
+  bool OK = convertUTF8ToUTF16String(Str, WStr);
+  assert(OK && "Invalid UTF8 in Str?");
+  (void)OK;
+
+  SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
+                                                  support::ulittle16_t());
+  copy(WStr, EndianStr.begin());
+  return allocateCallback(
+      sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
+      [EndianStr](raw_ostream &OS) {
+        // Length does not include the null-terminator.
+        support::ulittle32_t Length(2 * (EndianStr.size() - 1));
+        OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
+        OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
+                 sizeof(support::ulittle16_t) * EndianStr.size());
+      });
+}
+
 void BlobAllocator::writeTo(raw_ostream &OS) const {
   size_t BeginOffset = OS.tell();
   for (const auto &Callback : Callbacks)
@@ -269,7 +292,7 @@ static void streamMapping(yaml::IO &IO,
   mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
   mapOptional(IO, "Build Number", Info.BuildNumber, 0);
   IO.mapRequired("Platform ID", Info.PlatformId);
-  mapOptionalHex(IO, "CSD Version RVA", Info.CSDVersionRVA, 0);
+  IO.mapOptional("CSD Version", Stream.CSDVersion, "");
   mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
   mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
   switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
@@ -337,6 +360,7 @@ static Directory layout(BlobAllocator &F
   Directory Result;
   Result.Type = S.Type;
   Result.Location.RVA = File.tell();
+  Optional<size_t> DataEnd;
   switch (S.Kind) {
   case Stream::StreamKind::RawContent: {
     RawContentStream &Raw = cast<RawContentStream>(S);
@@ -347,14 +371,22 @@ static Directory layout(BlobAllocator &F
     });
     break;
   }
-  case Stream::StreamKind::SystemInfo:
-    File.allocateObject(cast<SystemInfoStream>(S).Info);
+  case Stream::StreamKind::SystemInfo: {
+    SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
+    File.allocateObject(SystemInfo.Info);
+    // The CSD string is not a part of the stream.
+    DataEnd = File.tell();
+    SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
     break;
+  }
   case Stream::StreamKind::TextContent:
     File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
     break;
   }
-  Result.Location.DataSize = File.tell() - Result.Location.RVA;
+  // If DataEnd is not set, we assume everything we generated is a part of the
+  // stream.
+  Result.Location.DataSize =
+      DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
   return Result;
 }
 
@@ -395,7 +427,11 @@ Stream::create(const Directory &StreamDe
     auto ExpectedInfo = File.getSystemInfo();
     if (!ExpectedInfo)
       return ExpectedInfo.takeError();
-    return make_unique<SystemInfoStream>(*ExpectedInfo);
+    auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
+    if (!ExpectedCSDVersion)
+      return ExpectedInfo.takeError();
+    return make_unique<SystemInfoStream>(*ExpectedInfo,
+                                         std::move(*ExpectedCSDVersion));
   }
   case StreamKind::TextContent:
     return make_unique<TextContentStream>(

Modified: llvm/trunk/test/tools/obj2yaml/basic-minidump.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/obj2yaml/basic-minidump.yaml?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/test/tools/obj2yaml/basic-minidump.yaml (original)
+++ llvm/trunk/test/tools/obj2yaml/basic-minidump.yaml Fri Apr  5 01:06:26 2019
@@ -5,7 +5,7 @@ Streams:
   - Type:            SystemInfo
     Processor Arch:  ARM64
     Platform ID:     Linux
-    CSD Version RVA: 0x01020304
+    CSD Version:     Linux 3.13.0-91-generic
     CPU:             
       CPUID:           0x05060708
   - Type:            LinuxAuxv
@@ -22,7 +22,7 @@ Streams:
 # CHECK-NEXT:   - Type:            SystemInfo
 # CHECK-NEXT:     Processor Arch:  ARM64
 # CHECK-NEXT:     Platform ID:     Linux
-# CHECK-NEXT:     CSD Version RVA: 0x01020304
+# CHECK-NEXT:     CSD Version:     Linux 3.13.0-91-generic
 # CHECK-NEXT:     CPU:             
 # CHECK-NEXT:       CPUID:           0x05060708
 # CHECK-NEXT:   - Type:            LinuxAuxv

Modified: llvm/trunk/unittests/Object/MinidumpTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Object/MinidumpTest.cpp?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/unittests/Object/MinidumpTest.cpp (original)
+++ llvm/trunk/unittests/Object/MinidumpTest.cpp Fri Apr  5 01:06:26 2019
@@ -249,3 +249,38 @@ TEST(MinidumpFile, getSystemInfo) {
   EXPECT_EQ(0x08070605u, Info.CPU.X86.FeatureInfo);
   EXPECT_EQ(0x02010009u, Info.CPU.X86.AMDExtendedFeatures);
 }
+
+TEST(MinidumpFile, getString) {
+  std::vector<uint8_t> ManyStrings{
+      // Header
+      'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version
+      2, 0, 0, 0,                           // NumberOfStreams,
+      0x20, 0, 0, 0,                        // StreamDirectoryRVA
+      0, 1, 2, 3, 4, 5, 6, 7,               // Checksum, TimeDateStamp
+      8, 9, 0, 1, 2, 3, 4, 5,               // Flags
+                                            // Stream Directory
+      0, 0, 0, 0, 0, 0, 0, 0,               // Type, DataSize,
+      0x20, 0, 0, 0,                        // RVA
+      1, 0, 0, 0, 0, 0,                     // String1 - odd length
+      0, 0, 1, 0, 0, 0,                     // String2 - too long
+      2, 0, 0, 0, 0, 0xd8,                  // String3 - invalid utf16
+      0, 0, 0, 0, 0, 0,                     // String4 - ""
+      2, 0, 0, 0, 'a', 0,                   // String5 - "a"
+      0,                                    // Mis-align next string
+      2, 0, 0, 0, 'a', 0,                   // String6 - "a"
+
+  };
+  auto ExpectedFile = create(ManyStrings);
+  ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded());
+  const MinidumpFile &File = **ExpectedFile;
+  EXPECT_THAT_EXPECTED(File.getString(44), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(50), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(56), Failed<BinaryError>());
+  EXPECT_THAT_EXPECTED(File.getString(62), HasValue(""));
+  EXPECT_THAT_EXPECTED(File.getString(68), HasValue("a"));
+  EXPECT_THAT_EXPECTED(File.getString(75), HasValue("a"));
+
+  // Check the case when the size field does not fit into the remaining data.
+  EXPECT_THAT_EXPECTED(File.getString(ManyStrings.size() - 2),
+                       Failed<BinaryError>());
+}

Modified: llvm/trunk/unittests/ObjectYAML/MinidumpYAMLTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ObjectYAML/MinidumpYAMLTest.cpp?rev=357749&r1=357748&r2=357749&view=diff
==============================================================================
--- llvm/trunk/unittests/ObjectYAML/MinidumpYAMLTest.cpp (original)
+++ llvm/trunk/unittests/ObjectYAML/MinidumpYAMLTest.cpp Fri Apr  5 01:06:26 2019
@@ -33,7 +33,6 @@ Streams:
   - Type:            SystemInfo
     Processor Arch:  ARM64
     Platform ID:     Linux
-    CSD Version RVA: 0x01020304
     CPU:
       CPUID:           0x05060708
   - Type:            LinuxMaps
@@ -54,7 +53,6 @@ Streams:
   const SystemInfo &SysInfo = *ExpectedSysInfo;
   EXPECT_EQ(ProcessorArchitecture::ARM64, SysInfo.ProcessorArch);
   EXPECT_EQ(OSPlatform::Linux, SysInfo.PlatformId);
-  EXPECT_EQ(0x01020304u, SysInfo.CSDVersionRVA);
   EXPECT_EQ(0x05060708u, SysInfo.CPU.Arm.CPUID);
 
   EXPECT_EQ(StreamType::LinuxMaps, File.streams()[1].Type);




More information about the llvm-commits mailing list