[Lldb-commits] [lldb] r357691 - Breakpad: Refine record classification code

Thu Apr 4 06:23:25 PDT 2019

Author: labath
Date: Thu Apr  4 06:23:25 2019
New Revision: 357691

URL: http://llvm.org/viewvc/llvm-project?rev=357691&view=rev
Log:
Breakpad: Refine record classification code

Previously we would classify all STACK records into a single bucket.
This is not really helpful, because there are three distinct types of
records beginning with the token "STACK" (STACK CFI INIT, STACK CFI,
STACK WIN). To be consistent with how we're treating other records, we
should classify these as three different record types.

It also implements the logic to put "STACK CFI INIT" and "STACK CFI"
records into the same "section" of the breakpad file, as they are meant
to be read together (similar to how FUNC and LINE records are treated).

The code which performs actual parsing of these records will come in a
separate patch.

Modified:
    lldb/trunk/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms
    lldb/trunk/lit/Modules/Breakpad/discontiguous-sections.test
    lldb/trunk/lit/Modules/Breakpad/sections.test
    lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
    lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
    lldb/trunk/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
    lldb/trunk/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp

Modified: lldb/trunk/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================

--- lldb/trunk/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms (original)
+++ lldb/trunk/lit/Modules/Breakpad/Inputs/discontiguous-sections.syms Thu Apr  4 06:23:25 2019
@@ -3,3 +3,5 @@ INFO CODE_ID 00000000B52499D1F0F766FFFFF
 FILE 0 /tmp/a.c
 PUBLIC 1010 0 _start
 FILE 1 /tmp/b.c
+STACK bogus
+FILE 2 /tmp/c.c

Modified: lldb/trunk/lit/Modules/Breakpad/discontiguous-sections.test
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/lit/Modules/Breakpad/discontiguous-sections.test?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/lit/Modules/Breakpad/discontiguous-sections.test (original)
+++ lldb/trunk/lit/Modules/Breakpad/discontiguous-sections.test Thu Apr  4 06:23:25 2019
@@ -1,7 +1,7 @@
 # Test handling discontiguous sections.
 RUN: lldb-test object-file %p/Inputs/discontiguous-sections.syms -contents | FileCheck %s
 
-CHECK: Showing 5 sections
+CHECK: Showing 6 sections
 
 CHECK:        ID: 0x1
 CHECK-NEXT:   Name: MODULE
@@ -25,3 +25,10 @@ CHECK:        File size: 16
 CHECK-NEXT:   Data:  (
 CHECK-NEXT:       0000: 46494C45 2031202F 746D702F 622E630A                                      |FILE 1 /tmp/b.c.|
 CHECK-NEXT:   )
+
+CHECK:        ID: 0x6
+CHECK-NEXT:   Name: FILE
+CHECK:        File size: 16
+CHECK-NEXT:   Data:  (
+CHECK-NEXT:       0000: 46494C45 2032202F 746D702F 632E630A                                      |FILE 2 /tmp/c.c.|
+CHECK-NEXT:   )

Modified: lldb/trunk/lit/Modules/Breakpad/sections.test
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/lit/Modules/Breakpad/sections.test?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/lit/Modules/Breakpad/sections.test (original)
+++ lldb/trunk/lit/Modules/Breakpad/sections.test Thu Apr  4 06:23:25 2019
@@ -73,7 +73,7 @@ CHECK-NEXT:   )
 
 CHECK:        Index: 5
 CHECK-NEXT:   ID: 0x6
-CHECK-NEXT:   Name: STACK
+CHECK-NEXT:   Name: STACK CFI INIT
 CHECK-NEXT:   Type: regular
 CHECK-NEXT:   Permissions: ---
 CHECK-NEXT:   Thread specific: no

Modified: lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp (original)
+++ lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp Thu Apr  4 06:23:25 2019
@@ -16,11 +16,14 @@ using namespace lldb_private;
 using namespace lldb_private::breakpad;
 
 namespace {
-enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack };
+enum class Token { Unknown, Module, Info, CodeID, File, Func, Public, Stack, CFI, Init };
 }
 
-static Token toToken(llvm::StringRef str) {
-  return llvm::StringSwitch<Token>(str)
+template<typename T>
+static T stringTo(llvm::StringRef Str);
+
+template <> Token stringTo<Token>(llvm::StringRef Str) {
+  return llvm::StringSwitch<Token>(Str)
       .Case("MODULE", Token::Module)
       .Case("INFO", Token::Info)
       .Case("CODE_ID", Token::CodeID)
@@ -28,21 +31,25 @@ static Token toToken(llvm::StringRef str
       .Case("FUNC", Token::Func)
       .Case("PUBLIC", Token::Public)
       .Case("STACK", Token::Stack)
+      .Case("CFI", Token::CFI)
+      .Case("INIT", Token::Init)
       .Default(Token::Unknown);
 }
 
-static llvm::Triple::OSType toOS(llvm::StringRef str) {
+template <>
+llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
   using llvm::Triple;
-  return llvm::StringSwitch<Triple::OSType>(str)
+  return llvm::StringSwitch<Triple::OSType>(Str)
       .Case("Linux", Triple::Linux)
       .Case("mac", Triple::MacOSX)
       .Case("windows", Triple::Win32)
       .Default(Triple::UnknownOS);
 }
 
-static llvm::Triple::ArchType toArch(llvm::StringRef str) {
+template <>
+llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
   using llvm::Triple;
-  return llvm::StringSwitch<Triple::ArchType>(str)
+  return llvm::StringSwitch<Triple::ArchType>(Str)
       .Case("arm", Triple::arm)
       .Case("arm64", Triple::aarch64)
       .Case("mips", Triple::mips)
@@ -56,6 +63,13 @@ static llvm::Triple::ArchType toArch(llv
       .Default(Triple::UnknownArch);
 }
 
+template<typename T>
+static T consume(llvm::StringRef &Str) {
+  llvm::StringRef Token;
+  std::tie(Token, Str) = getToken(Str);
+  return stringTo<T>(Token);
+}
+
 /// Return the number of hex digits needed to encode an (POD) object of a given
 /// type.
 template <typename T> static constexpr size_t hex_digits() {
@@ -112,8 +126,8 @@ static UUID parseModuleId(llvm::Triple::
                                                          : sizeof(data.uuid));
 }
 
-Record::Kind Record::classify(llvm::StringRef Line) {
-  Token Tok = toToken(getToken(Line).first);
+llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
+  Token Tok = consume<Token>(Line);
   switch (Tok) {
   case Token::Module:
     return Record::Module;
@@ -126,36 +140,45 @@ Record::Kind Record::classify(llvm::Stri
   case Token::Public:
     return Record::Public;
   case Token::Stack:
-    return Record::Stack;
+    Tok = consume<Token>(Line);
+    switch (Tok) {
+    case Token::CFI:
+      Tok = consume<Token>(Line);
+      return Tok == Token::Init ? Record::StackCFIInit : Record::StackCFI;
+    default:
+      return llvm::None;
+    }
 
-  case Token::CodeID:
   case Token::Unknown:
     // Optimistically assume that any unrecognised token means this is a line
     // record, those don't have a special keyword and start directly with a
     // hex number. CODE_ID should never be at the start of a line, but if it
     // is, it can be treated the same way as a garbled line record.
     return Record::Line;
+
+  case Token::CodeID:
+  case Token::CFI:
+  case Token::Init:
+    // These should never appear at the start of a valid record.
+    return llvm::None;
   }
   llvm_unreachable("Fully covered switch above!");
 }
 
 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::Module)
+  if (consume<Token>(Line) != Token::Module)
     return llvm::None;
 
-  std::tie(Str, Line) = getToken(Line);
-  llvm::Triple::OSType OS = toOS(Str);
+  llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
   if (OS == llvm::Triple::UnknownOS)
     return llvm::None;
 
-  std::tie(Str, Line) = getToken(Line);
-  llvm::Triple::ArchType Arch = toArch(Str);
+  llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
   if (Arch == llvm::Triple::UnknownArch)
     return llvm::None;
 
+  llvm::StringRef Str;
   std::tie(Str, Line) = getToken(Line);
   UUID ID = parseModuleId(OS, Str);
   if (!ID)
@@ -173,15 +196,13 @@ llvm::raw_ostream &breakpad::operator<<(
 
 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::Info)
+  if (consume<Token>(Line) != Token::Info)
     return llvm::None;
 
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::CodeID)
+  if (consume<Token>(Line) != Token::CodeID)
     return llvm::None;
 
+  llvm::StringRef Str;
   std::tie(Str, Line) = getToken(Line);
   // If we don't have any text following the code ID (e.g. on linux), we should
   // use this as the UUID. Otherwise, we should revert back to the module ID.
@@ -200,11 +221,10 @@ llvm::raw_ostream &breakpad::operator<<(
 
 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
   // FILE number name
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Token::File)
+  if (consume<Token>(Line) != Token::File)
     return llvm::None;
 
+  llvm::StringRef Str;
   size_t Number;
   std::tie(Str, Line) = getToken(Line);
   if (!to_integer(Str, Number))
@@ -231,11 +251,10 @@ static bool parsePublicOrFunc(llvm::Stri
 
   Token Tok = Size ? Token::Func : Token::Public;
 
-  llvm::StringRef Str;
-  std::tie(Str, Line) = getToken(Line);
-  if (toToken(Str) != Tok)
+  if (consume<Token>(Line) != Tok)
     return false;
 
+  llvm::StringRef Str;
   std::tie(Str, Line) = getToken(Line);
   Multiple = Str == "m";
 
@@ -354,8 +373,10 @@ llvm::StringRef breakpad::toString(Recor
     return "LINE";
   case Record::Public:
     return "PUBLIC";
-  case Record::Stack:
-    return "STACK";
+  case Record::StackCFIInit:
+    return "STACK CFI INIT";
+  case Record::StackCFI:
+    return "STACK CFI";
   }
   llvm_unreachable("Unknown record kind!");
 }

Modified: lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h (original)
+++ lldb/trunk/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h Thu Apr  4 06:23:25 2019
@@ -20,12 +20,12 @@ namespace breakpad {
 
 class Record {
 public:
-  enum Kind { Module, Info, File, Func, Line, Public, Stack };
+  enum Kind { Module, Info, File, Func, Line, Public, StackCFIInit, StackCFI };
 
   /// Attempt to guess the kind of the record present in the argument without
   /// doing a full parse. The returned kind will always be correct for valid
   /// records, but the full parse can still fail in case of corrupted input.
-  static Kind classify(llvm::StringRef Line);
+  static llvm::Optional<Kind> classify(llvm::StringRef Line);
 
 protected:
   Record(Kind K) : TheKind(K) {}

Modified: lldb/trunk/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp (original)
+++ lldb/trunk/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp Thu Apr  4 06:23:25 2019
@@ -148,11 +148,14 @@ void ObjectFileBreakpad::CreateSections(
     llvm::StringRef line;
     std::tie(line, text) = text.split('\n');
 
-    Record::Kind next_section = Record::classify(line);
+    llvm::Optional<Record::Kind> next_section = Record::classify(line);
     if (next_section == Record::Line) {
       // Line records logically belong to the preceding Func record, so we put
       // them in the same section.
       next_section = Record::Func;
+    } else if (next_section == Record::StackCFI) {
+      // Same goes for StackCFI and StackCFIInit
+      next_section = Record::StackCFIInit;
     }
     if (next_section == current_section)
       continue;

Modified: lldb/trunk/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp?rev=357691&r1=357690&r2=357691&view=diff
==============================================================================
--- lldb/trunk/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp (original)
+++ lldb/trunk/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp Thu Apr  4 06:23:25 2019
@@ -19,13 +19,18 @@ TEST(Record, classify) {
   EXPECT_EQ(Record::File, Record::classify("FILE"));
   EXPECT_EQ(Record::Func, Record::classify("FUNC"));
   EXPECT_EQ(Record::Public, Record::classify("PUBLIC"));
-  EXPECT_EQ(Record::Stack, Record::classify("STACK"));
+  EXPECT_EQ(Record::StackCFIInit, Record::classify("STACK CFI INIT"));
+  EXPECT_EQ(Record::StackCFI, Record::classify("STACK CFI"));
+
+  // Any obviously incorrect lines will be classified as such.
+  EXPECT_EQ(llvm::None, Record::classify("STACK"));
+  EXPECT_EQ(llvm::None, Record::classify("STACK CODE_ID"));
+  EXPECT_EQ(llvm::None, Record::classify("CODE_ID"));
 
   // Any line which does not start with a known keyword will be classified as a
   // line record, as those are the only ones that start without a keyword.
   EXPECT_EQ(Record::Line, Record::classify("deadbeef"));
   EXPECT_EQ(Record::Line, Record::classify("12"));
-  EXPECT_EQ(Record::Line, Record::classify("CODE_ID"));
 }
 
 TEST(ModuleRecord, parse) {