[llvm] 2164c54 - [AIX] Support of Big archive (read)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 17 09:00:49 PST 2022


Author: zhijian
Date: 2022-01-17T11:59:54-05:00
New Revision: 2164c54315bb3351a5e22336c12b1eeb2bbf6821

URL: https://github.com/llvm/llvm-project/commit/2164c54315bb3351a5e22336c12b1eeb2bbf6821
DIFF: https://github.com/llvm/llvm-project/commit/2164c54315bb3351a5e22336c12b1eeb2bbf6821.diff

LOG: [AIX] Support of Big archive (read)

Summary:

The patch is based on the EGuesnet's implement of the "Support of Big archive (read)
the first commit of the patch is come from https://reviews.llvm.org/D100651.

the rest of commits of the patch

1  Addressed the comments on the https://reviews.llvm.org/D100651
2  according to https://www.ibm.com/docs/en/aix/7.2?topic=formats-ar-file-format-big
using the "fl_fstmoff" for the first object file number, using "char ar_nxtmem[20]" to get next object file ,
using the "char fl_lstmoff[20]" for the last of the object file will fix the following problems:
   2.1 can not correct reading a archive files which has padding data between too object file
   2.2 can not correct reading a archive files from which some object file has be deleted

3 introduce a new derived class BigArchive for big ar file.

Reviewers: James Henderson
Differential Revision: https://reviews.llvm.org/D111889

Added: 
    llvm/test/Object/Inputs/aix-big-archive.a
    llvm/test/Object/archive-big-extract.test
    llvm/test/Object/archive-big-print.test
    llvm/test/Object/archive-big-read.test

Modified: 
    llvm/include/llvm/Object/Archive.h
    llvm/lib/Object/Archive.cpp
    llvm/lib/Object/ArchiveWriter.cpp
    llvm/test/tools/llvm-objdump/malformed-archives.test
    llvm/tools/llvm-ar/llvm-ar.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index 5b024c7baebcc..5a5fc90f18bdb 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -32,50 +32,127 @@
 namespace llvm {
 namespace object {
 
+const char ArchiveMagic[] = "!<arch>\n";
+const char ThinArchiveMagic[] = "!<thin>\n";
+const char BigArchiveMagic[] = "<bigaf>\n";
+
 class Archive;
 
-class ArchiveMemberHeader {
+class AbstractArchiveMemberHeader {
+protected:
+  AbstractArchiveMemberHeader(const Archive *Parent) : Parent(Parent){};
+
 public:
   friend class Archive;
-
-  ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
-                      uint64_t Size, Error *Err);
-  // ArchiveMemberHeader() = default;
+  virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
+  virtual ~AbstractArchiveMemberHeader(){};
 
   /// Get the name without looking up long names.
-  Expected<StringRef> getRawName() const;
+  virtual Expected<StringRef> getRawName() const = 0;
+  virtual StringRef getRawAccessMode() const = 0;
+  virtual StringRef getRawLastModified() const = 0;
+  virtual StringRef getRawUID() const = 0;
+  virtual StringRef getRawGID() const = 0;
 
   /// Get the name looking up long names.
-  Expected<StringRef> getName(uint64_t Size) const;
+  virtual Expected<StringRef> getName(uint64_t Size) const = 0;
+  virtual Expected<uint64_t> getSize() const = 0;
+  virtual uint64_t getOffset() const = 0;
 
-  Expected<uint64_t> getSize() const;
+  /// Get next file member location.
+  virtual Expected<const char *> getNextChildLoc() const = 0;
+  virtual Expected<bool> isThin() const = 0;
 
   Expected<sys::fs::perms> getAccessMode() const;
   Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const;
+  Expected<unsigned> getUID() const;
+  Expected<unsigned> getGID() const;
+
+  /// Returns the size in bytes of the format-defined member header of the
+  /// concrete archive type.
+  virtual uint64_t getSizeOf() const = 0;
+
+  const Archive *Parent;
+};
+
+template <typename T>
+class CommonArchiveMemberHeader : public AbstractArchiveMemberHeader {
+public:
+  CommonArchiveMemberHeader(const Archive *Parent, const T *RawHeaderPtr)
+      : AbstractArchiveMemberHeader(Parent), ArMemHdr(RawHeaderPtr){};
+  StringRef getRawAccessMode() const override;
+  StringRef getRawLastModified() const override;
+  StringRef getRawUID() const override;
+  StringRef getRawGID() const override;
+
+  uint64_t getOffset() const override;
+  uint64_t getSizeOf() const override { return sizeof(T); }
+
+  T const *ArMemHdr;
+};
 
-  StringRef getRawLastModified() const {
-    return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified))
-        .rtrim(' ');
+struct UnixArMemHdrType {
+  char Name[16];
+  char LastModified[12];
+  char UID[6];
+  char GID[6];
+  char AccessMode[8];
+  char Size[10]; ///< Size of data, not including header or padding.
+  char Terminator[2];
+};
+
+class ArchiveMemberHeader : public CommonArchiveMemberHeader<UnixArMemHdrType> {
+public:
+  ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr,
+                      uint64_t Size, Error *Err);
+
+  std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
+    return std::make_unique<ArchiveMemberHeader>(*this);
   }
 
-  Expected<unsigned> getUID() const;
-  Expected<unsigned> getGID() const;
+  Expected<StringRef> getRawName() const override;
 
-  // This returns the size of the private struct ArMemHdrType
-  uint64_t getSizeOf() const { return sizeof(ArMemHdrType); }
+  Expected<StringRef> getName(uint64_t Size) const override;
+  Expected<uint64_t> getSize() const override;
+  Expected<const char *> getNextChildLoc() const override;
+  Expected<bool> isThin() const override;
+};
 
-private:
-  struct ArMemHdrType {
-    char Name[16];
-    char LastModified[12];
-    char UID[6];
-    char GID[6];
-    char AccessMode[8];
-    char Size[10]; ///< Size of data, not including header or padding.
+// File Member Header
+struct BigArMemHdrType {
+  char Size[20];       // File member size in decimal
+  char NextOffset[20]; // Next member offset in decimal
+  char PrevOffset[20]; // Previous member offset in decimal
+  char LastModified[12];
+  char UID[12];
+  char GID[12];
+  char AccessMode[12];
+  char NameLen[4]; // File member name length in decimal
+  union {
+    char Name[2]; // Start of member name
     char Terminator[2];
   };
-  Archive const *Parent;
-  ArMemHdrType const *ArMemHdr;
+};
+
+// Define file member header of AIX big archive.
+class BigArchiveMemberHeader
+    : public CommonArchiveMemberHeader<BigArMemHdrType> {
+
+public:
+  BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
+                         uint64_t Size, Error *Err);
+  std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
+    return std::make_unique<BigArchiveMemberHeader>(*this);
+  }
+
+  Expected<StringRef> getRawName() const override;
+  Expected<uint64_t> getRawNameSize() const;
+
+  Expected<StringRef> getName(uint64_t Size) const override;
+  Expected<uint64_t> getSize() const override;
+  Expected<const char *> getNextChildLoc() const override;
+  Expected<uint64_t> getNextOffset() const;
+  Expected<bool> isThin() const override { return false; }
 };
 
 class Archive : public Binary {
@@ -84,10 +161,10 @@ class Archive : public Binary {
 public:
   class Child {
     friend Archive;
-    friend ArchiveMemberHeader;
+    friend AbstractArchiveMemberHeader;
 
     const Archive *Parent;
-    ArchiveMemberHeader Header;
+    std::unique_ptr<AbstractArchiveMemberHeader> Header;
     /// Includes header but not padding byte.
     StringRef Data;
     /// Offset from Data to the start of the file.
@@ -99,6 +176,44 @@ class Archive : public Binary {
     Child(const Archive *Parent, const char *Start, Error *Err);
     Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
 
+    Child(const Child &C)
+        : Parent(C.Parent), Data(C.Data), StartOfFile(C.StartOfFile) {
+      if (C.Header)
+        Header = C.Header->clone();
+    }
+
+    Child(Child &&C) {
+      Parent = std::move(C.Parent);
+      Header = std::move(C.Header);
+      Data = C.Data;
+      StartOfFile = C.StartOfFile;
+    }
+
+    Child &operator=(Child &&C) noexcept {
+      if (&C == this)
+        return *this;
+
+      Parent = std::move(C.Parent);
+      Header = std::move(C.Header);
+      Data = C.Data;
+      StartOfFile = C.StartOfFile;
+
+      return *this;
+    }
+
+    Child &operator=(const Child &C) {
+      if (&C == this)
+        return *this;
+
+      Parent = C.Parent;
+      if (C.Header)
+        Header = C.Header->clone();
+      Data = C.Data;
+      StartOfFile = C.StartOfFile;
+
+      return *this;
+    }
+
     bool operator==(const Child &other) const {
       assert(!Parent || !other.Parent || Parent == other.Parent);
       return Data.begin() == other.Data.begin();
@@ -109,19 +224,21 @@ class Archive : public Binary {
 
     Expected<StringRef> getName() const;
     Expected<std::string> getFullName() const;
-    Expected<StringRef> getRawName() const { return Header.getRawName(); }
+    Expected<StringRef> getRawName() const { return Header->getRawName(); }
 
     Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
-      return Header.getLastModified();
+      return Header->getLastModified();
     }
 
-    StringRef getRawLastModified() const { return Header.getRawLastModified(); }
+    StringRef getRawLastModified() const {
+      return Header->getRawLastModified();
+    }
 
-    Expected<unsigned> getUID() const { return Header.getUID(); }
-    Expected<unsigned> getGID() const { return Header.getGID(); }
+    Expected<unsigned> getUID() const { return Header->getUID(); }
+    Expected<unsigned> getGID() const { return Header->getGID(); }
 
     Expected<sys::fs::perms> getAccessMode() const {
-      return Header.getAccessMode();
+      return Header->getAccessMode();
     }
 
     /// \return the size of the archive member without the header or padding.
@@ -218,7 +335,7 @@ class Archive : public Binary {
   /// Size field is 10 decimal digits long
   static const uint64_t MaxMemberSize = 9999999999;
 
-  enum Kind { K_GNU, K_GNU64, K_BSD, K_DARWIN, K_DARWIN64, K_COFF };
+  enum Kind { K_GNU, K_GNU64, K_BSD, K_DARWIN, K_DARWIN64, K_COFF, K_AIXBIG };
 
   Kind kind() const { return (Kind)Format; }
   bool isThin() const { return IsThin; }
@@ -236,7 +353,6 @@ class Archive : public Binary {
     return make_range(symbol_begin(), symbol_end());
   }
 
-  // Cast methods.
   static bool classof(Binary const *v) { return v->isArchive(); }
 
   // check if a symbol is in the archive
@@ -247,24 +363,55 @@ class Archive : public Binary {
   StringRef getSymbolTable() const { return SymbolTable; }
   StringRef getStringTable() const { return StringTable; }
   uint32_t getNumberOfSymbols() const;
+  virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
 
   std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
     return std::move(ThinBuffers);
   }
 
+  std::unique_ptr<AbstractArchiveMemberHeader>
+  createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
+                            Error *Err) const;
+
+protected:
+  uint64_t getArchiveMagicLen() const;
+  void setFirstRegular(const Child &C);
+
 private:
   StringRef SymbolTable;
   StringRef StringTable;
 
   StringRef FirstRegularData;
   uint16_t FirstRegularStartOfFile = -1;
-  void setFirstRegular(const Child &C);
 
   unsigned Format : 3;
   unsigned IsThin : 1;
   mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
 };
 
+class BigArchive : public Archive {
+  /// Fixed-Length Header.
+  struct FixLenHdr {
+    char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string.
+    char MemOffset[20];                      ///< Offset to member table.
+    char GlobSymOffset[20];                  ///< Offset to global symbol table.
+    char
+        GlobSym64Offset[20]; ///< Offset global symbol table for 64-bit objects.
+    char FirstChildOffset[20]; ///< Offset to first archive member.
+    char LastChildOffset[20];  ///< Offset to last archive member.
+    char FreeOffset[20];       ///< Offset to first mem on free list.
+  };
+
+  const FixLenHdr *ArFixLenHdr;
+  uint64_t FirstChildOffset = 0;
+  uint64_t LastChildOffset = 0;
+
+public:
+  BigArchive(MemoryBufferRef Source, Error &Err);
+  uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
+  uint64_t getLastChildOffset() const { return LastChildOffset; }
+};
+
 } // end namespace object
 } // end namespace llvm
 

diff  --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 5492692445e75..35cba1a554630 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -38,9 +39,6 @@ using namespace llvm;
 using namespace object;
 using namespace llvm::support::endian;
 
-const char Magic[] = "!<arch>\n";
-const char ThinMagic[] = "!<thin>\n";
-
 void Archive::anchor() {}
 
 static Error malformedError(Twine Msg) {
@@ -49,27 +47,59 @@ static Error malformedError(Twine Msg) {
                                         object_error::parse_failed);
 }
 
+static Error
+createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
+                             const char *RawHeaderPtr, uint64_t Size) {
+  StringRef Msg("remaining size of archive too small for next archive "
+                "member header ");
+
+  Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
+  if (NameOrErr)
+    return malformedError(Msg + "for " + *NameOrErr);
+
+  consumeError(NameOrErr.takeError());
+  uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
+  return malformedError(Msg + "at offset " + Twine(Offset));
+}
+
+template <class T, std::size_t N>
+StringRef getFieldRawString(const T (&Field)[N]) {
+  return StringRef(Field, N).rtrim(" ");
+}
+
+template <class T>
+StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
+  return getFieldRawString(ArMemHdr->AccessMode);
+}
+
+template <class T>
+StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
+  return getFieldRawString(ArMemHdr->LastModified);
+}
+
+template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
+  return getFieldRawString(ArMemHdr->UID);
+}
+
+template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
+  return getFieldRawString(ArMemHdr->GID);
+}
+
+template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
+  return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
+}
+
 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
                                          const char *RawHeaderPtr,
                                          uint64_t Size, Error *Err)
-    : Parent(Parent),
-      ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
+    : CommonArchiveMemberHeader<UnixArMemHdrType>(
+          Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
   if (RawHeaderPtr == nullptr)
     return;
   ErrorAsOutParameter ErrAsOutParam(Err);
 
-  if (Size < sizeof(ArMemHdrType)) {
-    if (Err) {
-      std::string Msg("remaining size of archive too small for next archive "
-                      "member header ");
-      Expected<StringRef> NameOrErr = getName(Size);
-      if (!NameOrErr) {
-        consumeError(NameOrErr.takeError());
-        uint64_t Offset = RawHeaderPtr - Parent->getData().data();
-        *Err = malformedError(Msg + "at offset " + Twine(Offset));
-      } else
-        *Err = malformedError(Msg + "for " + NameOrErr.get());
-    }
+  if (Size < getSizeOf()) {
+    *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
     return;
   }
   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
@@ -94,6 +124,19 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
   }
 }
 
+BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
+                                               const char *RawHeaderPtr,
+                                               uint64_t Size, Error *Err)
+    : CommonArchiveMemberHeader<BigArMemHdrType>(
+          Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
+  if (RawHeaderPtr == nullptr)
+    return;
+  ErrorAsOutParameter ErrAsOutParam(Err);
+
+  if (Size < getSizeOf())
+    *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
+}
+
 // This gets the raw name from the ArMemHdr->Name field and checks that it is
 // valid for the kind of archive.  If it is not valid it returns an Error.
 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
@@ -121,7 +164,69 @@ Expected<StringRef> ArchiveMemberHeader::getRawName() const {
   return StringRef(ArMemHdr->Name, end);
 }
 
-// This gets the name looking up long names. Size is the size of the archive
+Expected<uint64_t>
+getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
+                         const Archive *Parent,
+                         const AbstractArchiveMemberHeader *MemHeader) {
+  uint64_t Value;
+  if (RawField.getAsInteger(10, Value)) {
+    uint64_t Offset = MemHeader->getOffset();
+    return malformedError("characters in " + FieldName +
+                          " field in archive member header are not "
+                          "all decimal numbers: '" +
+                          RawField +
+                          "' for the archive "
+                          "member header at offset " +
+                          Twine(Offset));
+  }
+  return Value;
+}
+
+Expected<uint64_t>
+getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
+                         const Archive *Parent,
+                         const AbstractArchiveMemberHeader *MemHeader) {
+  uint64_t Value;
+  if (RawField.getAsInteger(8, Value)) {
+    uint64_t Offset = MemHeader->getOffset();
+    return malformedError("characters in " + FieldName +
+                          " field in archive member header are not "
+                          "all octal numbers: '" +
+                          RawField +
+                          "' for the archive "
+                          "member header at offset " +
+                          Twine(Offset));
+  }
+  return Value;
+}
+
+Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
+  Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
+      "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
+  if (!NameLenOrErr)
+    // TODO: Out-of-line.
+    return NameLenOrErr.takeError();
+  uint64_t NameLen = NameLenOrErr.get();
+
+  // If the name length is odd, pad with '\0' to get an even length. After
+  // padding, there is the name terminator "`\n".
+  uint64_t NameLenWithPadding = alignTo(NameLen, 2);
+  StringRef NameTerminator = "`\n";
+  StringRef NameStringWithNameTerminator =
+      StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
+  if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
+    uint64_t Offset =
+        reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
+        Parent->getData().data();
+    // TODO: Out-of-line.
+    return malformedError(
+        "name does not have name terminator \"`\\n\" for archive member"
+        "header at offset " +
+        Twine(Offset));
+  }
+  return StringRef(ArMemHdr->Name, NameLen);
+}
+
 // member including the header, so the size of any name following the header
 // is checked to make sure it does not overflow.
 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
@@ -129,7 +234,7 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
   // This can be called from the ArchiveMemberHeader constructor when the
   // archive header is truncated to produce an error message with the name.
   // Make sure the name field is not truncated.
-  if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
+  if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
     uint64_t ArchiveOffset =
         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
     return malformedError("archive header truncated before the name field "
@@ -224,126 +329,133 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
   return Name.drop_back(1);
 }
 
+Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
+  return getRawName();
+}
+
 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
-  uint64_t Ret;
-  if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size))
-          .rtrim(" ")
-          .getAsInteger(10, Ret)) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    OS.write_escaped(
-        StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" "));
-    OS.flush();
-    uint64_t Offset =
-        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
-    return malformedError("characters in size field in archive header are not "
-                          "all decimal numbers: '" +
-                          Buf +
-                          "' for archive "
-                          "member header at offset " +
-                          Twine(Offset));
-  }
-  return Ret;
+  return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
+                                  Parent, this);
 }
 
-Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
-  unsigned Ret;
-  if (StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode))
-          .rtrim(' ')
-          .getAsInteger(8, Ret)) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    OS.write_escaped(
-        StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode))
-            .rtrim(" "));
-    OS.flush();
-    uint64_t Offset =
-        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
-    return malformedError("characters in AccessMode field in archive header "
-                          "are not all decimal numbers: '" +
-                          Buf + "' for the archive member header at offset " +
-                          Twine(Offset));
-  }
-  return static_cast<sys::fs::perms>(Ret);
+Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
+  Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
+      "size", getFieldRawString(ArMemHdr->Size), Parent, this);
+  if (!SizeOrErr)
+    return SizeOrErr.takeError();
+
+  Expected<uint64_t> NameLenOrErr = getRawNameSize();
+  if (!NameLenOrErr)
+    return NameLenOrErr.takeError();
+
+  return *SizeOrErr + alignTo(*NameLenOrErr, 2);
+}
+
+Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
+  return getArchiveMemberDecField(
+      "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
+}
+
+Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
+  return getArchiveMemberDecField(
+      "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
+}
+
+Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
+  Expected<uint64_t> AccessModeOrErr =
+      getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
+  if (!AccessModeOrErr)
+    return AccessModeOrErr.takeError();
+  return static_cast<sys::fs::perms>(*AccessModeOrErr);
 }
 
 Expected<sys::TimePoint<std::chrono::seconds>>
-ArchiveMemberHeader::getLastModified() const {
-  unsigned Seconds;
-  if (StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified))
-          .rtrim(' ')
-          .getAsInteger(10, Seconds)) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    OS.write_escaped(
-        StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified))
-            .rtrim(" "));
-    OS.flush();
-    uint64_t Offset =
-        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
-    return malformedError("characters in LastModified field in archive header "
-                          "are not all decimal numbers: '" +
-                          Buf + "' for the archive member header at offset " +
-                          Twine(Offset));
-  }
+AbstractArchiveMemberHeader::getLastModified() const {
+  Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
+      "LastModified", getRawLastModified(), Parent, this);
+
+  if (!SecondsOrErr)
+    return SecondsOrErr.takeError();
 
-  return sys::toTimePoint(Seconds);
+  return sys::toTimePoint(*SecondsOrErr);
 }
 
-Expected<unsigned> ArchiveMemberHeader::getUID() const {
-  unsigned Ret;
-  StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
+Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
+  StringRef User = getRawUID();
   if (User.empty())
     return 0;
-  if (User.getAsInteger(10, Ret)) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    OS.write_escaped(User);
-    OS.flush();
-    uint64_t Offset =
-        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
-    return malformedError("characters in UID field in archive header "
-                          "are not all decimal numbers: '" +
-                          Buf + "' for the archive member header at offset " +
-                          Twine(Offset));
-  }
-  return Ret;
+  return getArchiveMemberDecField("UID", User, Parent, this);
 }
 
-Expected<unsigned> ArchiveMemberHeader::getGID() const {
-  unsigned Ret;
-  StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
+Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
+  StringRef Group = getRawGID();
   if (Group.empty())
     return 0;
-  if (Group.getAsInteger(10, Ret)) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    OS.write_escaped(Group);
-    OS.flush();
-    uint64_t Offset =
-        reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
-    return malformedError("characters in GID field in archive header "
-                          "are not all decimal numbers: '" +
-                          Buf + "' for the archive member header at offset " +
-                          Twine(Offset));
+  return getArchiveMemberDecField("GID", Group, Parent, this);
+}
+
+Expected<bool> ArchiveMemberHeader::isThin() const {
+  Expected<StringRef> NameOrErr = getRawName();
+  if (!NameOrErr)
+    return NameOrErr.takeError();
+  StringRef Name = NameOrErr.get();
+  return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
+}
+
+Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
+  uint64_t Size = getSizeOf();
+  Expected<bool> isThinOrErr = isThin();
+  if (!isThinOrErr)
+    return isThinOrErr.takeError();
+
+  bool isThin = isThinOrErr.get();
+  if (!isThin) {
+    Expected<uint64_t> MemberSize = getSize();
+    if (!MemberSize)
+      return MemberSize.takeError();
+
+    Size += MemberSize.get();
   }
-  return Ret;
+
+  // If Size is odd, add 1 to make it even.
+  const char *NextLoc =
+      reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);
+
+  if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
+    return nullptr;
+
+  return NextLoc;
+}
+
+Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
+  if (getOffset() ==
+      static_cast<const BigArchive *>(Parent)->getLastChildOffset())
+    return nullptr;
+
+  Expected<uint64_t> NextOffsetOrErr = getNextOffset();
+  if (!NextOffsetOrErr)
+    return NextOffsetOrErr.takeError();
+  return Parent->getData().data() + NextOffsetOrErr.get();
 }
 
 Archive::Child::Child(const Archive *Parent, StringRef Data,
                       uint16_t StartOfFile)
-    : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
-      Data(Data), StartOfFile(StartOfFile) {}
+    : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
+  Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
+}
 
 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
-    : Parent(Parent),
-      Header(Parent, Start,
-             Parent
-                 ? Parent->getData().size() - (Start - Parent->getData().data())
-                 : 0,
-             Err) {
-  if (!Start)
+    : Parent(Parent) {
+  if (!Start) {
+    Header = nullptr;
     return;
+  }
+
+  Header = Parent->createArchiveMemberHeader(
+      Start,
+      Parent ? Parent->getData().size() - (Start - Parent->getData().data())
+             : 0,
+      Err);
 
   // If we are pointed to real data, Start is not a nullptr, then there must be
   // a non-null Err pointer available to report malformed data on.  Only in
@@ -358,7 +470,7 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
   if (*Err)
     return;
 
-  uint64_t Size = Header.getSizeOf();
+  uint64_t Size = Header->getSizeOf();
   Data = StringRef(Start, Size);
   Expected<bool> isThinOrErr = isThinMember();
   if (!isThinOrErr) {
@@ -377,7 +489,7 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
   }
 
   // Setup StartOfFile and PaddingBytes.
-  StartOfFile = Header.getSizeOf();
+  StartOfFile = Header->getSizeOf();
   // Don't include attached name.
   Expected<StringRef> NameOrErr = getRawName();
   if (!NameOrErr) {
@@ -385,17 +497,20 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
     return;
   }
   StringRef Name = NameOrErr.get();
-  if (Name.startswith("#1/")) {
+
+  if (Parent->kind() == Archive::K_AIXBIG) {
+    // The actual start of the file is after the name and any necessary
+    // even-alignment padding.
+    StartOfFile += ((Name.size() + 1) >> 1) << 1;
+  } else if (Name.startswith("#1/")) {
     uint64_t NameSize;
-    if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      OS.write_escaped(Name.substr(3).rtrim(' '));
-      OS.flush();
+    StringRef RawNameSize = Name.substr(3).rtrim(' ');
+    if (RawNameSize.getAsInteger(10, NameSize)) {
       uint64_t Offset = Start - Parent->getData().data();
       *Err = malformedError("long name length characters after the #1/ are "
                             "not all decimal numbers: '" +
-                            Buf + "' for archive member header at offset " +
+                            RawNameSize +
+                            "' for archive member header at offset " +
                             Twine(Offset));
       return;
     }
@@ -405,21 +520,15 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
 
 Expected<uint64_t> Archive::Child::getSize() const {
   if (Parent->IsThin)
-    return Header.getSize();
+    return Header->getSize();
   return Data.size() - StartOfFile;
 }
 
 Expected<uint64_t> Archive::Child::getRawSize() const {
-  return Header.getSize();
+  return Header->getSize();
 }
 
-Expected<bool> Archive::Child::isThinMember() const {
-  Expected<StringRef> NameOrErr = Header.getRawName();
-  if (!NameOrErr)
-    return NameOrErr.takeError();
-  StringRef Name = NameOrErr.get();
-  return Parent->IsThin && Name != "/" && Name != "//" && Name != "/SYM64/";
-}
+Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
 
 Expected<std::string> Archive::Child::getFullName() const {
   Expected<bool> isThin = isThinMember();
@@ -462,15 +571,14 @@ Expected<StringRef> Archive::Child::getBuffer() const {
 }
 
 Expected<Archive::Child> Archive::Child::getNext() const {
-  size_t SpaceToSkip = Data.size();
-  // If it's odd, add 1 to make it even.
-  if (SpaceToSkip & 1)
-    ++SpaceToSkip;
+  Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
+  if (!NextLocOrErr)
+    return NextLocOrErr.takeError();
 
-  const char *NextLoc = Data.data() + SpaceToSkip;
+  const char *NextLoc = *NextLocOrErr;
 
   // Check to see if this is at the end of the archive.
-  if (NextLoc == Parent->Data.getBufferEnd())
+  if (NextLoc == nullptr)
     return Child(nullptr, nullptr, nullptr);
 
   // Check to see if this is past the end of the archive.
@@ -505,7 +613,8 @@ Expected<StringRef> Archive::Child::getName() const {
   if (!RawSizeOrErr)
     return RawSizeOrErr.takeError();
   uint64_t RawSize = RawSizeOrErr.get();
-  Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
+  Expected<StringRef> NameOrErr =
+      Header->getName(Header->getSizeOf() + RawSize);
   if (!NameOrErr)
     return NameOrErr.takeError();
   StringRef Name = NameOrErr.get();
@@ -537,12 +646,39 @@ Archive::Child::getAsBinary(LLVMContext *Context) const {
 
 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
   Error Err = Error::success();
-  std::unique_ptr<Archive> Ret(new Archive(Source, Err));
+  std::unique_ptr<Archive> Ret;
+  StringRef Buffer = Source.getBuffer();
+
+  if (Buffer.startswith(BigArchiveMagic))
+    Ret = std::make_unique<BigArchive>(Source, Err);
+  else
+    Ret = std::make_unique<Archive>(Source, Err);
+
   if (Err)
     return std::move(Err);
   return std::move(Ret);
 }
 
+std::unique_ptr<AbstractArchiveMemberHeader>
+Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
+                                   Error *Err) const {
+  ErrorAsOutParameter ErrAsOutParam(Err);
+  if (kind() != K_AIXBIG)
+    return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
+  return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
+                                                  Err);
+}
+
+uint64_t Archive::getArchiveMagicLen() const {
+  if (isThin())
+    return sizeof(ThinArchiveMagic) - 1;
+
+  if (Kind() == K_AIXBIG)
+    return sizeof(BigArchiveMagic) - 1;
+
+  return sizeof(ArchiveMagic) - 1;
+}
+
 void Archive::setFirstRegular(const Child &C) {
   FirstRegularData = C.Data;
   FirstRegularStartOfFile = C.StartOfFile;
@@ -553,10 +689,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
   ErrorAsOutParameter ErrAsOutParam(&Err);
   StringRef Buffer = Data.getBuffer();
   // Check for sufficient magic.
-  if (Buffer.startswith(ThinMagic)) {
+  if (Buffer.startswith(ThinArchiveMagic)) {
     IsThin = true;
-  } else if (Buffer.startswith(Magic)) {
+  } else if (Buffer.startswith(ArchiveMagic)) {
+    IsThin = false;
+  } else if (Buffer.startswith(BigArchiveMagic)) {
+    Format = K_AIXBIG;
     IsThin = false;
+    return;
   } else {
     Err = make_error<GenericBinaryError>("file too small to be an archive",
                                          object_error::invalid_file_type);
@@ -788,7 +928,7 @@ Archive::child_iterator Archive::child_begin(Error &Err,
     return child_iterator::itr(
         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
 
-  const char *Loc = Data.getBufferStart() + strlen(Magic);
+  const char *Loc = Data.getBufferStart() + getFirstChildOffset();
   Child C(this, Loc, &Err);
   if (Err)
     return child_end();
@@ -997,6 +1137,39 @@ Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
 }
 
 // Returns true if archive file contains no member file.
-bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
+bool Archive::isEmpty() const {
+  return Data.getBufferSize() == getArchiveMagicLen();
+}
 
 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
+
+BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
+    : Archive(Source, Err) {
+  ErrorAsOutParameter ErrAsOutParam(&Err);
+  StringRef Buffer = Data.getBuffer();
+  ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
+
+  StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
+  if (RawOffset.getAsInteger(10, FirstChildOffset))
+    // TODO: Out-of-line.
+    Err = malformedError("malformed AIX big archive: first member offset \"" +
+                         RawOffset + "\" is not a number");
+
+  RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset);
+  if (RawOffset.getAsInteger(10, LastChildOffset))
+    // TODO: Out-of-line.
+    Err = malformedError("malformed AIX big archive: last member offset \"" +
+                         RawOffset + "\" is not a number");
+
+  child_iterator I = child_begin(Err, false);
+  if (Err)
+    return;
+  child_iterator E = child_end();
+  if (I == E) {
+    Err = Error::success();
+    return;
+  }
+  setFirstRegular(*I);
+  Err = Error::success();
+  return;
+}

diff  --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index da8bcec7f3d42..053b3dafed957 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -137,6 +137,7 @@ static bool isBSDLike(object::Archive::Kind Kind) {
   case object::Archive::K_DARWIN:
   case object::Archive::K_DARWIN64:
     return true;
+  case object::Archive::K_AIXBIG:
   case object::Archive::K_COFF:
     break;
   }
@@ -199,6 +200,7 @@ static bool is64BitKind(object::Archive::Kind Kind) {
   case object::Archive::K_BSD:
   case object::Archive::K_DARWIN:
   case object::Archive::K_COFF:
+  case object::Archive::K_AIXBIG:
     return false;
   case object::Archive::K_DARWIN64:
   case object::Archive::K_GNU64:

diff  --git a/llvm/test/Object/Inputs/aix-big-archive.a b/llvm/test/Object/Inputs/aix-big-archive.a
new file mode 100644
index 0000000000000..d5463a1fea90d
Binary files /dev/null and b/llvm/test/Object/Inputs/aix-big-archive.a 
diff er

diff  --git a/llvm/test/Object/archive-big-extract.test b/llvm/test/Object/archive-big-extract.test
new file mode 100644
index 0000000000000..a1d7f0c731c08
--- /dev/null
+++ b/llvm/test/Object/archive-big-extract.test
@@ -0,0 +1,5 @@
+## Test extract xcoff object file from AIX big archive.
+# RUN: rm -rf %t && mkdir -p %t/extracted/ && cd %t/extracted/
+# RUN: llvm-ar x %p/Inputs/aix-big-archive.a
+# RUN: echo "content_of_evenlen" > evenlen_1
+# RUN: cmp evenlen evenlen_1

diff  --git a/llvm/test/Object/archive-big-print.test b/llvm/test/Object/archive-big-print.test
new file mode 100644
index 0000000000000..23af93479ea8c
--- /dev/null
+++ b/llvm/test/Object/archive-big-print.test
@@ -0,0 +1,3 @@
+## Test printing an archive created by AIX ar (Big Archive).
+# RUN: llvm-ar p %p/Inputs/aix-big-archive.a evenlen | FileCheck %s --implicit-check-not={{.}}
+# CHECK: content_of_evenlen

diff  --git a/llvm/test/Object/archive-big-read.test b/llvm/test/Object/archive-big-read.test
new file mode 100644
index 0000000000000..0bb157affb905
--- /dev/null
+++ b/llvm/test/Object/archive-big-read.test
@@ -0,0 +1,5 @@
+## Test reading an AIX big archive member list.
+# RUN: env TZ=GMT llvm-ar tv %p/Inputs/aix-big-archive.a | FileCheck %s --strict-whitespace --implicit-check-not={{.}}
+
+# CHECK:       rw-r--r-- 550591/1000499      7 Jan  5 17:33 2022 oddlen
+# CHECK-NEXT:  rw-r--r-- 550591/1000499     19 Jan  5 17:33 2022 evenlen

diff  --git a/llvm/test/tools/llvm-objdump/malformed-archives.test b/llvm/test/tools/llvm-objdump/malformed-archives.test
index 6701f4809176f..3f2ce8650dbd4 100644
--- a/llvm/test/tools/llvm-objdump/malformed-archives.test
+++ b/llvm/test/tools/llvm-objdump/malformed-archives.test
@@ -13,7 +13,7 @@
 # RUN: not llvm-objdump --macho --archive-headers %t.libbogus1b.a 2>&1 | \
 # RUN:   FileCheck -check-prefix=BOGUS1 -DVAL=10% -DOFFSET=68 -DFILE=%t.libbogus1b.a %s
 
-# BOGUS1: '[[FILE]]': truncated or malformed archive (characters in size field in archive header are not all decimal numbers: '[[VAL]]' for archive member header at offset [[OFFSET]])
+# BOGUS1: '[[FILE]]': truncated or malformed archive (characters in size field in archive member header are not all decimal numbers: '[[VAL]]' for the archive member header at offset [[OFFSET]])
 
 --- !Arch
 Members:
@@ -128,7 +128,7 @@ Members:
 # RUN: not llvm-objdump --macho --archive-headers \
 # RUN:   %t.libbogus10.a 2>&1 | FileCheck -check-prefix=BOGUS10 -DFILE=%t.libbogus10.a %s 
 
-# BOGUS10: [[FILE]](hello.c): truncated or malformed archive (characters in UID field in archive header are not all decimal numbers: '~97&' for the archive member header at offset 8)
+# BOGUS10: [[FILE]](hello.c): truncated or malformed archive (characters in UID field in archive member header are not all decimal numbers: '~97&' for the archive member header at offset 8)
 
 --- !Arch
 Members:
@@ -141,20 +141,20 @@ Members:
 # RUN: not llvm-objdump --macho --archive-headers \
 # RUN:   %t.libbogus11.a 2>&1 | FileCheck -check-prefix=BOGUS11 -DFILE=%t.libbogus11.a %s 
 
-# BOGUS11: [[FILE]](hello.c): truncated or malformed archive (characters in GID field in archive header are not all decimal numbers: '#55!' for the archive member header at offset 8)
+# BOGUS11: [[FILE]](hello.c): truncated or malformed archive (characters in GID field in archive member header are not all decimal numbers: '#55!' for the archive member header at offset 8)
 
 --- !Arch
 Members:
   - Name: hello.c
     GID:  '#55!'
 
-## Check we report an error when the characters in the AccessMode field of a member header are not all decimal numbers.
+## Check we report an error when the characters in the AccessMode field of a member header are not all octal numbers.
 
 # RUN: yaml2obj --docnum=12 %s -o %t.libbogus12.a
 # RUN: not llvm-objdump --macho --archive-headers \
 # RUN:   %t.libbogus12.a 2>&1 | FileCheck -check-prefix=BOGUS12 -DFILE=%t.libbogus12.a %s 
 
-# BOGUS12: [[FILE]](hello.c): truncated or malformed archive (characters in AccessMode field in archive header are not all decimal numbers: 'Feed' for the archive member header at offset 8)
+# BOGUS12: [[FILE]](hello.c): truncated or malformed archive (characters in AccessMode field in archive member header are not all octal numbers: 'Feed' for the archive member header at offset 8)
 
 --- !Arch
 Members:
@@ -177,4 +177,6 @@ Members:
 # RUN: not llvm-ar tv %t.libbogus13.a 2>&1 | \
 # RUN:   FileCheck -check-prefix=BOGUS13B %s 
 
-# BOGUS13B: error: truncated or malformed archive (characters in LastModified field in archive header are not all decimal numbers: '1foobar273' for the archive member header at offset 8)
+# BOGUS13B: error: truncated or malformed archive (characters in LastModified field in archive member header are not all decimal numbers: '1foobar273' for the archive member header at offset 8)
+
+## TODO: add testing for AIX Big archive.

diff  --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 674f57812df4e..f7b29b8840277 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -1003,12 +1003,17 @@ static int performOperation(ArchiveOperation Operation,
     fail("unable to open '" + ArchiveName + "': " + EC.message());
 
   if (!EC) {
-    Error Err = Error::success();
-    object::Archive Archive(Buf.get()->getMemBufferRef(), Err);
-    failIfError(std::move(Err), "unable to load '" + ArchiveName + "'");
-    if (Archive.isThin())
+    Expected<std::unique_ptr<object::Archive>> ArchiveOrError =
+        object::Archive::create(Buf.get()->getMemBufferRef());
+    if (!ArchiveOrError)
+      failIfError(ArchiveOrError.takeError(),
+                  "unable to load '" + ArchiveName + "'");
+
+    std::unique_ptr<object::Archive> Archive = std::move(ArchiveOrError.get());
+    if (Archive->isThin())
       CompareFullPath = true;
-    performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers);
+    performOperation(Operation, Archive.get(), std::move(Buf.get()),
+                     NewMembers);
     return 0;
   }
 


        


More information about the llvm-commits mailing list