[llvm] Segment based binary output format for objcopy (PR #68569)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 9 02:28:44 PDT 2023


https://github.com/quic-subhkedi created https://github.com/llvm/llvm-project/pull/68569

This patch adds support for a new output format
for objcopy that is segments based binary output
formats. In this format the multiple output files
are created based on the number of loadable program header count in the input elf with each files having the content from the sections mapped into it.

>From cfcc006a35df49593f9b7a048750b61de606403a Mon Sep 17 00:00:00 2001
From: SUBHAM KEDIA <quic_subhkedi at quicinc.com>
Date: Mon, 9 Oct 2023 14:55:45 +0530
Subject: [PATCH] Segment based binary output format for objcopy

This patch adds support for a new output format
for objcopy that is segments based binary output
formats. In this format the multiple output files
are created based on the number of loadable program
header count in the input elf with each files having
the content from the sections mapped into it.
---
 llvm/include/llvm/ObjCopy/CommonConfig.h   |  9 +--
 llvm/include/llvm/Object/ELFObjectFile.h   |  9 +++
 llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp        |  2 +
 llvm/lib/ObjCopy/ELF/ELFObject.cpp         | 26 +++++++
 llvm/lib/ObjCopy/ELF/ELFObject.h           | 58 ++++++++++++++-
 llvm/tools/llvm-objcopy/ObjcopyOptions.cpp |  1 +
 llvm/tools/llvm-objcopy/llvm-objcopy.cpp   | 83 +++++++++++++---------
 7 files changed, 149 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h
index e7ce1e6f2c54d75..f1e10a663ab70a2 100644
--- a/llvm/include/llvm/ObjCopy/CommonConfig.h
+++ b/llvm/include/llvm/ObjCopy/CommonConfig.h
@@ -27,12 +27,7 @@
 namespace llvm {
 namespace objcopy {
 
-enum class FileFormat {
-  Unspecified,
-  ELF,
-  Binary,
-  IHex,
-};
+enum class FileFormat { Unspecified, ELF, Binary, IHex, SegBin };
 
 // This type keeps track of the machine info for various architectures. This
 // lets us map architecture names to ELF types and the e_machine value of the
@@ -213,6 +208,8 @@ struct CommonConfig {
   StringRef AddGnuDebugLink;
   // Cached gnu_debuglink's target CRC
   uint32_t GnuDebugLinkCRC32;
+  // Segment Index to be dumped
+  uint32_t SegmentIndex;
   std::optional<StringRef> ExtractPartition;
   StringRef SplitDWO;
   StringRef SymbolsPrefix;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index dc3d6bb58710c74..ffa17867cabab1e 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -114,6 +114,8 @@ class ELFObjectFileBase : public ObjectFile {
   // corresponding to the section with that index.
   Expected<std::vector<BBAddrMap>>
   readBBAddrMap(std::optional<unsigned> TextSectionIndex = std::nullopt) const;
+
+  virtual uint32_t getProgramHeaderCount() const = 0;
 };
 
 class ELFSectionRef : public SectionRef {
@@ -467,6 +469,8 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
   bool isRelocatableObject() const override;
 
   void createFakeSections() { EF.createFakeSections(); }
+
+  uint32_t getProgramHeaderCount() const override;
 };
 
 using ELF32LEObjectFile = ELFObjectFile<ELF32LE>;
@@ -553,6 +557,11 @@ uint64_t ELFObjectFile<ELFT>::getSectionOffset(DataRefImpl Sec) const {
   return getSection(Sec)->sh_offset;
 }
 
+template <class ELFT>
+uint32_t ELFObjectFile<ELFT>::getProgramHeaderCount() const {
+  return EF.getHeader().e_phnum;
+}
+
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSymbolValueImpl(DataRefImpl Symb) const {
   Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 9d02ba051a0a84b..36bc88a1cbd8c18 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -183,6 +183,8 @@ static std::unique_ptr<Writer> createWriter(const CommonConfig &Config,
     return std::make_unique<BinaryWriter>(Obj, Out);
   case FileFormat::IHex:
     return std::make_unique<IHexWriter>(Obj, Out);
+  case FileFormat::SegBin:
+    return std::make_unique<SegBinWriter>(Obj, Out, Config.SegmentIndex);
   default:
     return createELFWriter(Config, Obj, Out, OutputElfType);
   }
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index eaeef11b127e487..ac1c70714f1c777 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -174,6 +174,13 @@ Error SectionWriter::visit(const Section &Sec) {
   return Error::success();
 }
 
+Error BinarySegmentWriter::visit(const Segment &Seg) {
+  if (Seg.Type == PT_LOAD)
+    llvm::copy(Seg.Contents, Out.getBufferStart());
+
+  return Error::success();
+}
+
 static bool addressOverflows32bit(uint64_t Addr) {
   // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok
   return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX;
@@ -2679,6 +2686,25 @@ Error BinaryWriter::finalize() {
   return Error::success();
 }
 
+Error SegBinWriter::write() {
+  const Segment &Seg = Obj.getSegmentForIndex(SegmentIndex);
+  if (Error Err = Seg.accept(*SegmentWriter))
+    return Err;
+  Out.write(Buf->getBufferStart(), Buf->getBufferSize());
+  return Error::success();
+}
+
+Error SegBinWriter::finalize() {
+  const Segment &Seg = Obj.getSegmentForIndex(SegmentIndex);
+  Buf = WritableMemoryBuffer::getNewMemBuffer(Seg.FileSize);
+  if (!Buf)
+    return createStringError(errc::not_enough_memory,
+                             "failed to allocate memory buffer of " +
+                                 Twine::utohexstr(Seg.FileSize) + " bytes");
+  SegmentWriter = std::make_unique<BinarySegmentWriter>(*Buf);
+  return Error::success();
+}
+
 bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs,
                                             const SectionBase *Rhs) const {
   return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) <
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 89a03b3fe0ee354..802eb3999d5a2e9 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -87,6 +87,13 @@ class SectionVisitor {
   virtual Error visit(const DecompressedSection &Sec) = 0;
 };
 
+class SegmentVisitor {
+public:
+  virtual ~SegmentVisitor() = default;
+
+  virtual Error visit(const Segment &Seg) = 0;
+};
+
 class MutableSectionVisitor {
 public:
   virtual ~MutableSectionVisitor() = default;
@@ -126,6 +133,18 @@ class SectionWriter : public SectionVisitor {
   explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {}
 };
 
+class SegmentWriter : public SegmentVisitor {
+protected:
+  WritableMemoryBuffer &Out;
+
+public:
+  virtual ~SegmentWriter() = default;
+
+  Error visit(const Segment &Seg) override = 0;
+
+  explicit SegmentWriter(WritableMemoryBuffer &Buf) : Out(Buf) {}
+};
+
 template <class ELFT> class ELFSectionWriter : public SectionWriter {
 private:
   using Elf_Word = typename ELFT::Word;
@@ -191,6 +210,16 @@ class BinarySectionWriter : public SectionWriter {
       : SectionWriter(Buf) {}
 };
 
+class BinarySegmentWriter : public SegmentWriter {
+public:
+  virtual ~BinarySegmentWriter() {}
+
+  Error visit(const Segment &Seg) override;
+
+  explicit BinarySegmentWriter(WritableMemoryBuffer &Buf)
+      : SegmentWriter(Buf) {}
+};
+
 using IHexLineData = SmallVector<char, 64>;
 
 struct IHexRecord {
@@ -387,6 +416,19 @@ class IHexWriter : public Writer {
   IHexWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {}
 };
 
+class SegBinWriter : public Writer {
+private:
+  std::unique_ptr<BinarySegmentWriter> SegmentWriter;
+  uint32_t SegmentIndex = 0;
+
+public:
+  ~SegBinWriter() {}
+  Error finalize() override;
+  Error write() override;
+  SegBinWriter(Object &Obj, raw_ostream &Out, uint32_t SegmentIndex)
+      : Writer(Obj, Out), SegmentIndex(SegmentIndex) {}
+};
+
 class SectionBase {
 public:
   std::string Name;
@@ -476,7 +518,7 @@ class Segment {
 
   void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
   void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
-
+  Error accept(SegmentVisitor &Visitor) const { return Visitor.visit(*this); }
   ArrayRef<uint8_t> getContents() const { return Contents; }
 };
 
@@ -1036,6 +1078,9 @@ class Object {
   static bool sectionIsAlloc(const SectionBase &Sec) {
     return Sec.Flags & ELF::SHF_ALLOC;
   };
+  static bool segmentIsLoadable(const Segment &Seg) {
+    return Seg.Type & ELF::PT_LOAD;
+  };
 
 public:
   template <class T>
@@ -1087,6 +1132,17 @@ class Object {
   }
   SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
 
+  iterator_range<
+      filter_iterator<pointee_iterator<std::vector<SegPtr>::const_iterator>,
+                      decltype(&segmentIsLoadable)>>
+  loadableSegments() const {
+    return make_filter_range(make_pointee_range(Segments), segmentIsLoadable);
+  }
+
+  const Segment &getSegmentForIndex(uint32_t Index) const {
+    return *(Segments[Index]);
+  }
+
   ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
 
   Error removeSections(bool AllowBrokenLinks,
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index d33adb0b6a3e478..599fca958f73a35 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -688,6 +688,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
   Config.OutputFormat = StringSwitch<FileFormat>(OutputFormat)
                             .Case("binary", FileFormat::Binary)
                             .Case("ihex", FileFormat::IHex)
+                            .Case("segbin", FileFormat::SegBin)
                             .Default(FileFormat::Unspecified);
   if (Config.OutputFormat == FileFormat::Unspecified) {
     if (OutputFormat.empty()) {
diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index 2afa97601f5cfd8..c568a90e7ec75ea 100644
--- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -9,6 +9,7 @@
 #include "ObjcopyOptions.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -123,6 +124,7 @@ static Error executeObjcopyOnRawBinary(ConfigManager &ConfigMgr,
   case FileFormat::Binary:
   case FileFormat::IHex:
   case FileFormat::Unspecified:
+  case FileFormat::SegBin:
     Expected<const ELFConfig &> ELFConfig = ConfigMgr.getELFConfig();
     if (!ELFConfig)
       return ELFConfig.takeError();
@@ -133,6 +135,36 @@ static Error executeObjcopyOnRawBinary(ConfigManager &ConfigMgr,
   llvm_unreachable("unsupported output format");
 }
 
+static Error emitOutput(CommonConfig &Config,
+                        FilePermissionsApplier &PermsApplierOrErr,
+                        std::function<Error(raw_ostream &)> ObjcopyFunc) {
+  std::string OutputFilename =
+      Config.OutputFilename.str() + ((Config.OutputFormat == FileFormat::SegBin)
+                                         ? std::to_string(Config.SegmentIndex)
+                                         : "");
+  if (ObjcopyFunc) {
+    if (Config.SplitDWO.size()) {
+      // remove .dwo tables
+      Config.ExtractDWO = false;
+      Config.StripDWO = true;
+    }
+    // Apply transformations described by Config and store result into
+    // Config.OutputFilename using specified ObjcopyFunc function.
+    if (Error E = writeToOutput(OutputFilename, ObjcopyFunc))
+      return E;
+  }
+
+  if (Error E = PermsApplierOrErr.apply(OutputFilename, Config.PreserveDates))
+    return E;
+
+  if (!Config.SplitDWO.empty())
+    if (Error E = PermsApplierOrErr.apply(Config.SplitDWO, Config.PreserveDates,
+                                          static_cast<sys::fs::perms>(0666)))
+      return E;
+
+  return Error::success();
+}
+
 /// The function executeObjcopy does the higher level dispatch based on the type
 /// of input (raw binary, archive or single object file) and takes care of the
 /// format-agnostic modifications, i.e. preserving dates.
@@ -145,6 +177,7 @@ static Error executeObjcopy(ConfigManager &ConfigMgr) {
     return PermsApplierOrErr.takeError();
 
   std::function<Error(raw_ostream & OutFile)> ObjcopyFunc;
+  uint32_t SegmentCount = 0;
 
   OwningBinary<llvm::object::Binary> BinaryHolder;
   std::unique_ptr<MemoryBuffer> MemoryBufferHolder;
@@ -181,47 +214,33 @@ static Error executeObjcopy(ConfigManager &ConfigMgr) {
         return E;
     } else {
       // Handle llvm::object::Binary.
+      if (ELFObjectFileBase *ElfFile =
+              dyn_cast<ELFObjectFileBase>(BinaryHolder.getBinary()))
+        SegmentCount = ElfFile->getProgramHeaderCount();
+
       ObjcopyFunc = [&](raw_ostream &OutFile) -> Error {
         return executeObjcopyOnBinary(ConfigMgr, *BinaryHolder.getBinary(),
                                       OutFile);
       };
     }
   }
-
-  if (ObjcopyFunc) {
-    if (Config.SplitDWO.empty()) {
-      // Apply transformations described by Config and store result into
-      // Config.OutputFilename using specified ObjcopyFunc function.
-      if (Error E = writeToOutput(Config.OutputFilename, ObjcopyFunc))
-        return E;
-    } else {
-      Config.ExtractDWO = true;
-      Config.StripDWO = false;
-      // Copy .dwo tables from the Config.InputFilename into Config.SplitDWO
-      // file using specified ObjcopyFunc function.
-      if (Error E = writeToOutput(Config.SplitDWO, ObjcopyFunc))
-        return E;
-      Config.ExtractDWO = false;
-      Config.StripDWO = true;
-      // Apply transformations described by Config, remove .dwo tables and
-      // store result into Config.OutputFilename using specified ObjcopyFunc
-      // function.
-      if (Error E = writeToOutput(Config.OutputFilename, ObjcopyFunc))
+  if (Config.SplitDWO.size()) {
+    Config.ExtractDWO = true;
+    Config.StripDWO = false;
+    // Copy .dwo tables from the Config.InputFilename into Config.SplitDWO
+    // file using specified ObjcopyFunc function.
+    if (Error E = writeToOutput(Config.SplitDWO, ObjcopyFunc))
+      return E;
+  }
+  if (Config.OutputFormat == FileFormat::SegBin) {
+    for (uint32_t SegmentIdx = 0; SegmentIdx < SegmentCount; SegmentIdx++) {
+      Config.SegmentIndex = SegmentIdx;
+      if (Error E = emitOutput(Config, PermsApplierOrErr.get(), ObjcopyFunc))
         return E;
     }
+    return Error::success();
   }
-
-  if (Error E =
-          PermsApplierOrErr->apply(Config.OutputFilename, Config.PreserveDates))
-    return E;
-
-  if (!Config.SplitDWO.empty())
-    if (Error E =
-            PermsApplierOrErr->apply(Config.SplitDWO, Config.PreserveDates,
-                                     static_cast<sys::fs::perms>(0666)))
-      return E;
-
-  return Error::success();
+  return emitOutput(Config, PermsApplierOrErr.get(), ObjcopyFunc);
 }
 
 int llvm_objcopy_main(int argc, char **argv, const llvm::ToolContext &) {



More information about the llvm-commits mailing list