[llvm] r276686 - Next step along the way to getting good error messages for bad archives.

Kevin Enderby via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 13:36:37 PDT 2016


Author: enderby
Date: Mon Jul 25 15:36:36 2016
New Revision: 276686

URL: http://llvm.org/viewvc/llvm-project?rev=276686&view=rev
Log:
Next step along the way to getting good error messages for bad archives.

I consulted with Lang Hames on this work, and the goal was to add a bit
of "where" in the archive the error occurred along with what the error was.

So this step changes ArchiveMemberHeader into a class with a pointer
to the archive header and the parent archive.  Which allows the methods
in the ArchiveMemberHeader to determine which member the header is
for to include that information in the error message.

For this first step the "where" is just the offset to the member in the
archive.  The next step will be a new method on ArchiveMemberHeader
to get the full name, if possible, to be use in the error message.  Which
will now be possible as ArchiveMemberHeader contains a pointer to
the Archive with its string table and its size, etc. so the full name can
be determined from the header if it is valid.

Also this change adds the missing checks the archive header is actually
contained in the buffer and is not truncated, as well as if the terminating
characters are correct in the header.

And changes one error message in Archive::Child::getNext() where the
name or offset to member is now added.

Added:
    llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus4.a
    llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus5.a
Modified:
    llvm/trunk/include/llvm/Object/Archive.h
    llvm/trunk/lib/Object/Archive.cpp
    llvm/trunk/test/tools/llvm-objdump/malformed-archives.test

Modified: llvm/trunk/include/llvm/Object/Archive.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/Archive.h?rev=276686&r1=276685&r2=276686&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Object/Archive.h (original)
+++ llvm/trunk/include/llvm/Object/Archive.h Mon Jul 25 15:36:36 2016
@@ -25,14 +25,15 @@
 
 namespace llvm {
 namespace object {
-struct ArchiveMemberHeader {
-  char Name[16];
-  char LastModified[12];
-  char UID[6];
-  char GID[6];
-  char AccessMode[8];
-  char Size[10]; ///< Size of data, not including header or padding.
-  char Terminator[2];
+
+class Archive;
+
+class ArchiveMemberHeader {
+public:
+  friend class Archive;
+  ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
+                      uint64_t Size, Error *Err);
+  // ArchiveMemberHeader() = default;
 
   /// Get the name without looking up long names.
   llvm::StringRef getName() const;
@@ -43,10 +44,29 @@ struct ArchiveMemberHeader {
   sys::fs::perms getAccessMode() const;
   sys::TimeValue getLastModified() const;
   llvm::StringRef getRawLastModified() const {
-    return StringRef(LastModified, sizeof(LastModified)).rtrim(' ');
+    return StringRef(ArMemHdr->LastModified,
+                     sizeof(ArMemHdr->LastModified)).rtrim(' ');
   }
   unsigned getUID() const;
   unsigned getGID() const;
+
+  // This returns the size of the private struct ArMemHdrType
+  uint64_t getSizeOf() const {
+    return sizeof(ArMemHdrType);
+  }
+
+private:
+  struct ArMemHdrType {
+    char Name[16];
+    char LastModified[12];
+    char UID[6];
+    char GID[6];
+    char AccessMode[8];
+    char Size[10]; ///< Size of data, not including header or padding.
+    char Terminator[2];
+  };
+  Archive const *Parent;
+  ArMemHdrType const *ArMemHdr;
 };
 
 class Archive : public Binary {
@@ -55,15 +75,13 @@ public:
   class Child {
     friend Archive;
     const Archive *Parent;
+    friend ArchiveMemberHeader;
+    ArchiveMemberHeader Header;
     /// \brief Includes header but not padding byte.
     StringRef Data;
     /// \brief Offset from Data to the start of the file.
     uint16_t StartOfFile;
 
-    const ArchiveMemberHeader *getHeader() const {
-      return reinterpret_cast<const ArchiveMemberHeader *>(Data.data());
-    }
-
     bool isThinMember() const;
 
   public:
@@ -80,17 +98,17 @@ public:
 
     ErrorOr<StringRef> getName() const;
     ErrorOr<std::string> getFullName() const;
-    StringRef getRawName() const { return getHeader()->getName(); }
+    StringRef getRawName() const { return Header.getName(); }
     sys::TimeValue getLastModified() const {
-      return getHeader()->getLastModified();
+      return Header.getLastModified();
     }
     StringRef getRawLastModified() const {
-      return getHeader()->getRawLastModified();
+      return Header.getRawLastModified();
     }
-    unsigned getUID() const { return getHeader()->getUID(); }
-    unsigned getGID() const { return getHeader()->getGID(); }
+    unsigned getUID() const { return Header.getUID(); }
+    unsigned getGID() const { return Header.getGID(); }
     sys::fs::perms getAccessMode() const {
-      return getHeader()->getAccessMode();
+      return Header.getAccessMode();
     }
     /// \return the size of the archive member without the header or padding.
     Expected<uint64_t> getSize() const;

Modified: llvm/trunk/lib/Object/Archive.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/Archive.cpp?rev=276686&r1=276685&r2=276686&view=diff
==============================================================================
--- llvm/trunk/lib/Object/Archive.cpp (original)
+++ llvm/trunk/lib/Object/Archive.cpp Mon Jul 25 15:36:36 2016
@@ -34,44 +34,90 @@ malformedError(Twine Msg) {
                                         object_error::parse_failed);
 }
 
+ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
+                                         const char *RawHeaderPtr,
+                                         uint64_t Size, Error *Err)
+    : Parent(Parent),
+      ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
+  if (RawHeaderPtr == nullptr)
+    return;
+  ErrorAsOutParameter ErrAsOutParam(Err);
+
+  // TODO: For errors messages with the ArchiveMemberHeader class use the
+  // archive member name instead of the the offset to the archive member header.
+  // When there is also error getting the member name then use the offset to
+  // the member in the message.
+
+  if (Size < sizeof(ArMemHdrType)) {
+    if (Err) {
+      uint64_t Offset = RawHeaderPtr - Parent->getData().data();
+      *Err = malformedError("remaining size of archive too small for next "
+                            "archive member header at offset " +
+                            Twine(Offset));
+    }
+    return;
+  }
+  if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
+    if (Err) {
+      std::string Buf;
+      raw_string_ostream OS(Buf);
+      OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator,
+                                       sizeof(ArMemHdr->Terminator)));
+      OS.flush();
+      uint64_t Offset = RawHeaderPtr - Parent->getData().data();
+      *Err = malformedError("terminator characters in archive member \"" + Buf +
+                            "\" not the correct \"`\\n\" values for the "
+                            "archive member header at offset " + Twine(Offset));
+    }
+    return;
+  }
+}
+
 StringRef ArchiveMemberHeader::getName() const {
   char EndCond;
-  if (Name[0] == '/' || Name[0] == '#')
+  if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
     EndCond = ' ';
   else
     EndCond = '/';
   llvm::StringRef::size_type end =
-      llvm::StringRef(Name, sizeof(Name)).find(EndCond);
+      llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
   if (end == llvm::StringRef::npos)
-    end = sizeof(Name);
-  assert(end <= sizeof(Name) && end > 0);
+    end = sizeof(ArMemHdr->Name);
+  assert(end <= sizeof(ArMemHdr->Name) && end > 0);
   // Don't include the EndCond if there is one.
-  return llvm::StringRef(Name, end);
+  return llvm::StringRef(ArMemHdr->Name, end);
 }
 
 Expected<uint32_t> ArchiveMemberHeader::getSize() const {
   uint32_t Ret;
-  if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) {
+  if (llvm::StringRef(ArMemHdr->Size,
+        sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
     std::string Buf;
     raw_string_ostream OS(Buf);
-    OS.write_escaped(llvm::StringRef(Size, sizeof(Size)).rtrim(" "));
+    OS.write_escaped(llvm::StringRef(ArMemHdr->Size,
+                                     sizeof(ArMemHdr->Size)).rtrim(" "));
     OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
     return malformedError("characters in size field in archive header are not "
-                          "all decimal numbers: '" + Buf + "'");
+                          "all decimal numbers: '" + Buf + "' for archive "
+                          "member header at offset " + Twine(Offset));
   }
   return Ret;
 }
 
 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
   unsigned Ret;
-  if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(' ').getAsInteger(8, Ret))
+  if (StringRef(ArMemHdr->AccessMode,
+                sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret))
     llvm_unreachable("Access mode is not an octal number.");
   return static_cast<sys::fs::perms>(Ret);
 }
 
 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
   unsigned Seconds;
-  if (StringRef(LastModified, sizeof(LastModified)).rtrim(' ')
+  if (StringRef(ArMemHdr->LastModified,
+                sizeof(ArMemHdr->LastModified)).rtrim(' ')
           .getAsInteger(10, Seconds))
     llvm_unreachable("Last modified time not a decimal number.");
 
@@ -82,7 +128,7 @@ sys::TimeValue ArchiveMemberHeader::getL
 
 unsigned ArchiveMemberHeader::getUID() const {
   unsigned Ret;
-  StringRef User = StringRef(UID, sizeof(UID)).rtrim(' ');
+  StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
   if (User.empty())
     return 0;
   if (User.getAsInteger(10, Ret))
@@ -92,7 +138,7 @@ unsigned ArchiveMemberHeader::getUID() c
 
 unsigned ArchiveMemberHeader::getGID() const {
   unsigned Ret;
-  StringRef Group = StringRef(GID, sizeof(GID)).rtrim(' ');
+  StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
   if (Group.empty())
     return 0;
   if (Group.getAsInteger(10, Ret))
@@ -102,15 +148,23 @@ unsigned ArchiveMemberHeader::getGID() c
 
 Archive::Child::Child(const Archive *Parent, StringRef Data,
                       uint16_t StartOfFile)
-    : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {}
+    : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
+      Data(Data), StartOfFile(StartOfFile) {
+}
 
 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
-    : Parent(Parent) {
+    : Parent(Parent), Header(Parent, Start, Parent->getData().size() -
+                             (Start - Parent->getData().data()), Err) {
   if (!Start)
     return;
   ErrorAsOutParameter ErrAsOutParam(Err);
 
-  uint64_t Size = sizeof(ArchiveMemberHeader);
+  // If there was an error in the construction of the Header and we were passed
+  // Err that is not nullptr then just return with the error now set.
+  if (Err && *Err)
+    return;
+
+  uint64_t Size = Header.getSizeOf();
   Data = StringRef(Start, Size);
   if (!isThinMember()) {
     Expected<uint64_t> MemberSize = getRawSize();
@@ -124,7 +178,7 @@ Archive::Child::Child(const Archive *Par
   }
 
   // Setup StartOfFile and PaddingBytes.
-  StartOfFile = sizeof(ArchiveMemberHeader);
+  StartOfFile = Header.getSizeOf();
   // Don't include attached name.
   StringRef Name = getRawName();
   if (Name.startswith("#1/")) {
@@ -137,7 +191,7 @@ Archive::Child::Child(const Archive *Par
 
 Expected<uint64_t> Archive::Child::getSize() const {
   if (Parent->IsThin) {
-    Expected<uint32_t> Size = getHeader()->getSize();
+    Expected<uint32_t> Size = Header.getSize();
     if (!Size)
       return Size.takeError();
     return Size.get();
@@ -146,11 +200,11 @@ Expected<uint64_t> Archive::Child::getSi
 }
 
 Expected<uint64_t> Archive::Child::getRawSize() const {
-  return getHeader()->getSize();
+  return Header.getSize();
 }
 
 bool Archive::Child::isThinMember() const {
-  StringRef Name = getHeader()->getName();
+  StringRef Name = Header.getName();
   return Parent->IsThin && Name != "/" && Name != "//";
 }
 
@@ -200,9 +254,16 @@ Expected<Archive::Child> Archive::Child:
     return Child(Parent, nullptr, nullptr);
 
   // Check to see if this is past the end of the archive.
-  if (NextLoc > Parent->Data.getBufferEnd())
-    return malformedError("offset to next archive member past the end of the "
-                          "archive");
+  if (NextLoc > Parent->Data.getBufferEnd()) {
+    Twine Msg("offset to next archive member past the end of the archive after "
+              "member ");
+    ErrorOr<StringRef> NameOrErr = getName();
+    if (NameOrErr.getError()) {
+      uint64_t Offset = Data.data() - Parent->getData().data();
+      return malformedError(Msg + "at offset " + Twine(Offset));
+    } else
+      return malformedError(Msg + Twine(NameOrErr.get()));
+  }
 
   Error Err;
   Child Ret(Parent, NextLoc, &Err);
@@ -247,7 +308,7 @@ ErrorOr<StringRef> Archive::Child::getNa
     uint64_t name_size;
     if (name.substr(3).rtrim(' ').getAsInteger(10, name_size))
       llvm_unreachable("Long name length is not an ingeter");
-    return Data.substr(sizeof(ArchiveMemberHeader), name_size).rtrim('\0');
+    return Data.substr(Header.getSizeOf(), name_size).rtrim('\0');
   } else {
     // It is not a long name so trim the blanks at the end of the name.
     if (name[name.size() - 1] != '/') {

Added: llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus4.a
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus4.a?rev=276686&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus4.a (added)
+++ llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus4.a Mon Jul 25 15:36:36 2016
@@ -0,0 +1,11 @@
+!<arch>
+hello.c         1444941273  124   0     100644  102       `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+	printf("Hello World\n");
+	return EXIT_SUCCESS;
+}
+foo.c           1444941645  124   0     100644

Added: llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus5.a
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus5.a?rev=276686&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus5.a (added)
+++ llvm/trunk/test/tools/llvm-objdump/Inputs/libbogus5.a Mon Jul 25 15:36:36 2016
@@ -0,0 +1,10 @@
+!<arch>
+hello.c         1444941273  124   0     100644  102       @
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+	printf("Hello World\n");
+	return EXIT_SUCCESS;
+}

Modified: llvm/trunk/test/tools/llvm-objdump/malformed-archives.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objdump/malformed-archives.test?rev=276686&r1=276685&r2=276686&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-objdump/malformed-archives.test (original)
+++ llvm/trunk/test/tools/llvm-objdump/malformed-archives.test Mon Jul 25 15:36:36 2016
@@ -5,16 +5,28 @@
 # RUN:   %p/Inputs/libbogus1.a \
 # RUN:   2>&1 | FileCheck -check-prefix=bogus1 %s 
 
-# bogus1: libbogus1.a': truncated or malformed archive (characters in size field in archive header are not all decimal numbers: '10%')
+# bogus1: libbogus1.a': truncated or malformed archive (characters in size field in archive header are not all decimal numbers: '10%' for archive member header at offset 8)
 
 # RUN: not llvm-objdump -macho -archive-headers \
 # RUN:   %p/Inputs/libbogus2.a \
 # RUN:   2>&1 | FileCheck -check-prefix=bogus2 %s 
 
-# bogus2: libbogus2.a': truncated or malformed archive (characters in size field in archive header are not all decimal numbers: '1%')
+# bogus2: libbogus2.a': truncated or malformed archive (characters in size field in archive header are not all decimal numbers: '1%' for archive member header at offset 170)
 
 # RUN: not llvm-objdump -macho -archive-headers \
 # RUN:   %p/Inputs/libbogus3.a \
 # RUN:   2>&1 | FileCheck -check-prefix=bogus3 %s 
 
-# bogus3: libbogus3.a': truncated or malformed archive (offset to next archive member past the end of the archive)
+# bogus3: libbogus3.a': truncated or malformed archive (offset to next archive member past the end of the archive after member foo.c)
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN:   %p/Inputs/libbogus4.a \
+# RUN:   2>&1 | FileCheck -check-prefix=bogus4 %s 
+
+# bogus4: libbogus4.a': truncated or malformed archive (remaining size of archive too small for next archive member header at offset 170)
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN:   %p/Inputs/libbogus5.a \
+# RUN:   2>&1 | FileCheck -check-prefix=bogus5 %s 
+
+# bogus5: libbogus5.a': truncated or malformed archive (terminator characters in archive member "@\n" not the correct "`\n" values for the archive member header at offset 8)




More information about the llvm-commits mailing list