[llvm] r340070 - [llvm-objcopy] Add support for -I binary -B <arch>.

Jordan Rupprecht via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 17 11:51:11 PDT 2018


Author: rupprecht
Date: Fri Aug 17 11:51:11 2018
New Revision: 340070

URL: http://llvm.org/viewvc/llvm-project?rev=340070&view=rev
Log:
[llvm-objcopy] Add support for -I binary -B <arch>.

Summary:
The -I (--input-target) and -B (--binary-architecture) flags exist but are currently silently ignored. This adds support for -I binary for architectures i386, x86-64 (and alias i386:x86-64), arm, aarch64, sparc, and ppc (powerpc:common64). This is largely based on D41687.

This is done by implementing an additional subclass of Reader, BinaryReader, which works by interpreting the input file as contents for .data field, sets up a synthetic header, and adds additional sections/symbols (e.g. _binary__tmp_data_txt_start).

Reviewers: jakehehrlich, alexshap, jhenderson, javed.absar

Reviewed By: jhenderson

Subscribers: jyknight, nemanjai, kbarton, fedor.sergeev, jrtc27, kristof.beyls, paulsemel, llvm-commits

Differential Revision: https://reviews.llvm.org/D50343

Added:
    llvm/trunk/test/tools/llvm-objcopy/binary-input-and-output.test
    llvm/trunk/test/tools/llvm-objcopy/binary-input-arch.test
    llvm/trunk/test/tools/llvm-objcopy/binary-input-error.test
    llvm/trunk/test/tools/llvm-objcopy/binary-input.test
Modified:
    llvm/trunk/tools/llvm-objcopy/Object.cpp
    llvm/trunk/tools/llvm-objcopy/Object.h
    llvm/trunk/tools/llvm-objcopy/llvm-objcopy.cpp

Added: llvm/trunk/test/tools/llvm-objcopy/binary-input-and-output.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/binary-input-and-output.test?rev=340070&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/binary-input-and-output.test (added)
+++ llvm/trunk/test/tools/llvm-objcopy/binary-input-and-output.test Fri Aug 17 11:51:11 2018
@@ -0,0 +1,15 @@
+# RUN: echo abcd > %t.txt
+
+# Preserve input to verify it is not modified
+# RUN: cp %t.txt %t-copy.txt
+
+# -I binary -O binary preserves payload through in-memory representation
+# RUN: llvm-objcopy -I binary -B i386:x86-64 -O binary %t.txt %t.2.txt
+# RUN: cmp %t.txt %t.2.txt
+# RUN: cmp %t.txt %t-copy.txt
+
+# -I binary -O binary preserves payload through an intermediate object file
+# RUN: llvm-objcopy -I binary -B i386:x86-64 %t.txt %t.o
+# RUN: llvm-objcopy -O binary %t.o %t.3.txt
+# RUN: cmp %t.txt %t.3.txt
+# RUN: cmp %t.txt %t-copy.txt

Added: llvm/trunk/test/tools/llvm-objcopy/binary-input-arch.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/binary-input-arch.test?rev=340070&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/binary-input-arch.test (added)
+++ llvm/trunk/test/tools/llvm-objcopy/binary-input-arch.test Fri Aug 17 11:51:11 2018
@@ -0,0 +1,75 @@
+# RUN: echo abcd > %t.txt
+
+# RUN: llvm-objcopy -I binary -B aarch64 %t.txt %t.aarch64.o
+# RUN: llvm-readobj -file-headers %t.aarch64.o | FileCheck %s --check-prefixes=CHECK,AARCH64,64
+
+# RUN: llvm-objcopy -I binary -B arm %t.txt %t.arm.o
+# RUN: llvm-readobj -file-headers %t.arm.o | FileCheck %s --check-prefixes=CHECK,ARM,32
+
+# RUN: llvm-objcopy -I binary -B i386 %t.txt %t.i386.o
+# RUN: llvm-readobj -file-headers %t.i386.o | FileCheck %s --check-prefixes=CHECK,I386,32
+
+# RUN: llvm-objcopy -I binary -B i386:x86-64 %t.txt %t.i386:x86-64.o
+# RUN: llvm-readobj -file-headers %t.i386:x86-64.o | FileCheck %s --check-prefixes=CHECK,X86-64,64
+
+# RUN: llvm-objcopy -I binary -B powerpc:common64 %t.txt %t.powerpc:common64.o
+# RUN: llvm-readobj -file-headers %t.powerpc:common64.o | FileCheck %s --check-prefixes=CHECK,PPC,64
+
+# RUN: llvm-objcopy -I binary -B sparc %t.txt %t.sparc.o
+# RUN: llvm-readobj -file-headers %t.sparc.o | FileCheck %s --check-prefixes=CHECK,SPARC,32
+
+# RUN: llvm-objcopy -I binary -B x86-64 %t.txt %t.x86-64.o
+# RUN: llvm-readobj -file-headers %t.x86-64.o | FileCheck %s --check-prefixes=CHECK,X86-64,64
+
+# CHECK: Format:
+# AARCH64-SAME: ELF64-aarch64-little
+# ARM-SAME:     ELF32-arm-little
+# I386-SAME:    ELF32-i386
+# PPC-SAME:     ELF64-ppc64
+# SPARC-SAME:   ELF32-sparc
+# X86-64-SAME:  ELF64-x86-64
+
+# AARCH64-NEXT: Arch: aarch64
+# ARM-NEXT:     Arch: arm
+# I386-NEXT:    Arch: i386
+# PPC-NEXT:     Arch: powerpc64le
+# SPARC-NEXT:   Arch: sparcel
+# X86-64-NEXT:  Arch: x86_64
+
+# 32-NEXT:      AddressSize: 32bit
+# 64-NEXT:      AddressSize: 64bit
+
+# CHECK:        ElfHeader {
+# CHECK-NEXT:     Ident {
+# CHECK-NEXT:       Magic: (7F 45 4C 46)
+# 32-NEXT:          Class: 32-bit (0x1)
+# 64-NEXT:          Class: 64-bit (0x2)
+# CHECK-NEXT:       DataEncoding: LittleEndian (0x1)
+# CHECK-NEXT:       FileVersion: 1
+# CHECK-NEXT:       OS/ABI: SystemV (0x0)
+# CHECK-NEXT:       ABIVersion: 0
+# CHECK-NEXT:       Unused: (00 00 00 00 00 00 00)
+# CHECK-NEXT:     }
+# CHECK-NEXT:     Type: Relocatable (0x1)
+# AARCH64-NEXT:   Machine: EM_AARCH64 (0xB7)
+# ARM-NEXT:       Machine: EM_ARM (0x28)
+# I386-NEXT:      Machine: EM_386 (0x3)
+# PPC-NEXT:       Machine: EM_PPC64 (0x15)
+# SPARC-NEXT:     Machine: EM_SPARC (0x2)
+# X86-64-NEXT:    Machine: EM_X86_64 (0x3E)
+# CHECK-NEXT:     Version: 1
+# CHECK-NEXT:     Entry: 0x0
+# CHECK-NEXT:     ProgramHeaderOffset:
+# CHECK-NEXT:     SectionHeaderOffset:
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# 32-NEXT:        HeaderSize: 52
+# 64-NEXT:        HeaderSize: 64
+# 32-NEXT:        ProgramHeaderEntrySize: 32
+# 64-NEXT:        ProgramHeaderEntrySize: 56
+# CHECK-NEXT:     ProgramHeaderCount: 0
+# 32-NEXT:        SectionHeaderEntrySize: 40
+# 64-NEXT:        SectionHeaderEntrySize: 64
+# CHECK-NEXT:     SectionHeaderCount: 4
+# CHECK-NEXT:     StringTableSectionIndex:
+# CHECK-NEXT:   }

Added: llvm/trunk/test/tools/llvm-objcopy/binary-input-error.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/binary-input-error.test?rev=340070&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/binary-input-error.test (added)
+++ llvm/trunk/test/tools/llvm-objcopy/binary-input-error.test Fri Aug 17 11:51:11 2018
@@ -0,0 +1,10 @@
+# RUN: echo abcd > %t.txt
+
+# RUN: not llvm-objcopy -I binary %t.txt %t.o 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-BINARY-ARCH
+
+# RUN: not llvm-objcopy -I binary -B xyz %t.txt %t.o 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=BAD-BINARY-ARCH
+
+# MISSING-BINARY-ARCH: Specified binary input without specifiying an architecture.
+# BAD-BINARY-ARCH: Invalid architecture: 'xyz'.

Added: llvm/trunk/test/tools/llvm-objcopy/binary-input.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/binary-input.test?rev=340070&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/binary-input.test (added)
+++ llvm/trunk/test/tools/llvm-objcopy/binary-input.test Fri Aug 17 11:51:11 2018
@@ -0,0 +1,112 @@
+# RUN: echo -n abcd > %t.x-txt
+# Preserve input to verify it is not modified
+# RUN: cp %t.x-txt %t-copy.txt
+# RUN: llvm-objcopy -I binary -B i386:x86-64 %t.x-txt %t.o
+# RUN: llvm-readobj -sections -symbols %t.o | FileCheck %s
+# RUN: cmp %t.x-txt %t-copy.txt
+
+# CHECK:      Sections [
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 0
+# CHECK-NEXT:     Name:  (0)
+# CHECK-NEXT:     Type: SHT_NULL (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size:
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 0
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 1
+# CHECK-NEXT:     Name: .strtab
+# CHECK-NEXT:     Type: SHT_STRTAB (0x3)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size:
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 2
+# CHECK-NEXT:     Name: .symtab
+# CHECK-NEXT:     Type: SHT_SYMTAB (0x2)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size:
+# CHECK-NEXT:     Link: 1
+# CHECK-NEXT:     Info: 1
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 24
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 3
+# CHECK-NEXT:     Name: .data
+# CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
+# CHECK-NEXT:     Flags [ (0x3)
+# CHECK-NEXT:       SHF_ALLOC (0x2)
+# CHECK-NEXT:       SHF_WRITE (0x1)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset:
+# CHECK-NEXT:     Size: 4
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+# Note: the symbol names are derived from the full path (with non-alnum values
+# replaced with "_"), e.g. "/tmp/a-b.c" should yield
+# _binary__tmp_a_b_c_{start,end,size}.
+# Just check for _binary_{{[_a-zA-Z0-9]*}}_x_txt_{start,end,size} to avoid
+# making assumptions about how this test is run.
+
+# CHECK:      Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name:
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:     Size: 0
+# CHECK-NEXT:     Binding: Local (0x0)
+# CHECK-NEXT:     Type: None (0x0)
+# CHECK-NEXT:     Other: 0
+# CHECK-NEXT:     Section: Undefined (0x0)
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _binary_{{[_a-zA-Z0-9]*}}_x_txt_start
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:     Size: 0
+# CHECK-NEXT:     Binding: Global (0x1)
+# CHECK-NEXT:     Type: None (0x0)
+# CHECK-NEXT:     Other: 0
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _binary_{{[_a-zA-Z0-9]*}}_x_txt_end
+# CHECK-NEXT:     Value: 0x4
+# CHECK-NEXT:     Size: 0
+# CHECK-NEXT:     Binding: Global (0x1)
+# CHECK-NEXT:     Type: None (0x0)
+# CHECK-NEXT:     Other: 0
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _binary_{{[_a-zA-Z0-9]*}}_x_txt_size
+# CHECK-NEXT:     Value: 0x4
+# CHECK-NEXT:     Size: 0
+# CHECK-NEXT:     Binding: Global (0x1)
+# CHECK-NEXT:     Type: None (0x0)
+# CHECK-NEXT:     Other: 0
+# CHECK-NEXT:     Section: Absolute
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]

Modified: llvm/trunk/tools/llvm-objcopy/Object.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/Object.cpp?rev=340070&r1=340069&r2=340070&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/Object.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/Object.cpp Fri Aug 17 11:51:11 2018
@@ -230,12 +230,12 @@ void SymbolTableSection::assignIndices()
     Sym->Index = Index++;
 }
 
-void SymbolTableSection::addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
+void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
                                    SectionBase *DefinedIn, uint64_t Value,
                                    uint8_t Visibility, uint16_t Shndx,
-                                   uint64_t Sz) {
+                                   uint64_t Size) {
   Symbol Sym;
-  Sym.Name = Name;
+  Sym.Name = Name.str();
   Sym.Binding = Bind;
   Sym.Type = Type;
   Sym.DefinedIn = DefinedIn;
@@ -249,7 +249,7 @@ void SymbolTableSection::addSymbol(Strin
   }
   Sym.Value = Value;
   Sym.Visibility = Visibility;
-  Sym.Size = Sz;
+  Sym.Size = Size;
   Sym.Index = Symbols.size();
   Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
   Size += this->EntrySize;
@@ -587,6 +587,84 @@ static bool compareSegmentsByPAddr(const
   return A->Index < B->Index;
 }
 
+template <class ELFT> void BinaryELFBuilder<ELFT>::initFileHeader() {
+  Obj->Flags = 0x0;
+  Obj->Type = ET_REL;
+  Obj->Entry = 0x0;
+  Obj->Machine = EMachine;
+  Obj->Version = 1;
+}
+
+template <class ELFT> void BinaryELFBuilder<ELFT>::initHeaderSegment() {
+  Obj->ElfHdrSegment.Index = 0;
+}
+
+template <class ELFT> StringTableSection *BinaryELFBuilder<ELFT>::addStrTab() {
+  auto &StrTab = Obj->addSection<StringTableSection>();
+  StrTab.Name = ".strtab";
+
+  Obj->SectionNames = &StrTab;
+  return &StrTab;
+}
+
+template <class ELFT>
+SymbolTableSection *
+BinaryELFBuilder<ELFT>::addSymTab(StringTableSection *StrTab) {
+  auto &SymTab = Obj->addSection<SymbolTableSection>();
+
+  SymTab.Name = ".symtab";
+  SymTab.Link = StrTab->Index;
+  // TODO: Factor out dependence on ElfType here.
+  SymTab.EntrySize = sizeof(Elf_Sym);
+
+  // The symbol table always needs a null symbol
+  SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0);
+
+  Obj->SymbolTable = &SymTab;
+  return &SymTab;
+}
+
+template <class ELFT>
+void BinaryELFBuilder<ELFT>::addData(SymbolTableSection *SymTab) {
+  auto Data = ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()),
+      MemBuf->getBufferSize());
+  auto &DataSection = Obj->addSection<Section>(Data);
+  DataSection.Name = ".data";
+  DataSection.Type = ELF::SHT_PROGBITS;
+  DataSection.Size = Data.size();
+  DataSection.Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+
+  std::string SanitizedFilename = MemBuf->getBufferIdentifier().str();
+  std::replace_if(std::begin(SanitizedFilename), std::end(SanitizedFilename),
+                  [](char c) { return !isalnum(c); }, '_');
+  Twine Prefix = Twine("_binary_") + SanitizedFilename;
+
+  SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection,
+                    /*Value=*/0, STV_DEFAULT, 0, 0);
+  SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection,
+                    /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0);
+  SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr,
+                    /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
+}
+
+template <class ELFT> void BinaryELFBuilder<ELFT>::initSections() {
+  for (auto &Section : Obj->sections()) {
+    Section.initialize(Obj->sections());
+  }
+}
+
+template <class ELFT> std::unique_ptr<Object> BinaryELFBuilder<ELFT>::build() {
+  initFileHeader();
+  initHeaderSegment();
+  StringTableSection *StrTab = addStrTab();
+  SymbolTableSection *SymTab = addSymTab(StrTab);
+  initSections();
+  addData(SymTab);
+
+  return std::move(Obj);
+}
+
 template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
   for (auto &Parent : Obj.segments()) {
     // Every segment will overlap with itself but we don't want a segment to
@@ -631,15 +709,6 @@ template <class ELFT> void ELFBuilder<EL
   }
 
   auto &ElfHdr = Obj.ElfHdrSegment;
-  // Creating multiple PT_PHDR segments technically is not valid, but PT_LOAD
-  // segments must not overlap, and other types fit even less.
-  ElfHdr.Type = PT_PHDR;
-  ElfHdr.Flags = 0;
-  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
-  ElfHdr.VAddr = 0;
-  ElfHdr.PAddr = 0;
-  ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
-  ElfHdr.Align = 0;
   ElfHdr.Index = Index++;
 
   const auto &Ehdr = *ElfFile.getHeader();
@@ -894,7 +963,6 @@ template <class ELFT> void ELFBuilder<EL
 template <class ELFT> void ELFBuilder<ELFT>::build() {
   const auto &Ehdr = *ElfFile.getHeader();
 
-  std::copy(Ehdr.e_ident, Ehdr.e_ident + 16, Obj.Ident);
   Obj.Type = Ehdr.e_type;
   Obj.Machine = Ehdr.e_machine;
   Obj.Version = Ehdr.e_version;
@@ -926,16 +994,15 @@ Writer::~Writer() {}
 
 Reader::~Reader() {}
 
-ElfType ELFReader::getElfType() const {
-  if (isa<ELFObjectFile<ELF32LE>>(Bin))
-    return ELFT_ELF32LE;
-  if (isa<ELFObjectFile<ELF64LE>>(Bin))
-    return ELFT_ELF64LE;
-  if (isa<ELFObjectFile<ELF32BE>>(Bin))
-    return ELFT_ELF32BE;
-  if (isa<ELFObjectFile<ELF64BE>>(Bin))
-    return ELFT_ELF64BE;
-  llvm_unreachable("Invalid ELFType");
+std::unique_ptr<Object> BinaryReader::create() const {
+  if (MInfo.Is64Bit)
+    return MInfo.IsLittleEndian
+               ? BinaryELFBuilder<ELF64LE>(MInfo.EMachine, MemBuf).build()
+               : BinaryELFBuilder<ELF64BE>(MInfo.EMachine, MemBuf).build();
+  else
+    return MInfo.IsLittleEndian
+               ? BinaryELFBuilder<ELF32LE>(MInfo.EMachine, MemBuf).build()
+               : BinaryELFBuilder<ELF32BE>(MInfo.EMachine, MemBuf).build();
 }
 
 std::unique_ptr<Object> ELFReader::create() const {
@@ -963,11 +1030,24 @@ std::unique_ptr<Object> ELFReader::creat
 template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
   uint8_t *B = Buf.getBufferStart();
   Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
-  std::copy(Obj.Ident, Obj.Ident + 16, Ehdr.e_ident);
+  std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
+  Ehdr.e_ident[EI_MAG0] = 0x7f;
+  Ehdr.e_ident[EI_MAG1] = 'E';
+  Ehdr.e_ident[EI_MAG2] = 'L';
+  Ehdr.e_ident[EI_MAG3] = 'F';
+  Ehdr.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32;
+  Ehdr.e_ident[EI_DATA] =
+      ELFT::TargetEndianness == support::big ? ELFDATA2MSB : ELFDATA2LSB;
+  Ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+  Ehdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
+  Ehdr.e_ident[EI_ABIVERSION] = 0;
+
   Ehdr.e_type = Obj.Type;
   Ehdr.e_machine = Obj.Machine;
   Ehdr.e_version = Obj.Version;
   Ehdr.e_entry = Obj.Entry;
+  // TODO: Only set phoff when a program header exists, to avoid tools
+  // thinking this is corrupt data.
   Ehdr.e_phoff = Obj.ProgramHdrSegment.Offset;
   Ehdr.e_flags = Obj.Flags;
   Ehdr.e_ehsize = sizeof(Elf_Ehdr);
@@ -1172,6 +1252,17 @@ static uint64_t LayoutSections(Range Sec
   return Offset;
 }
 
+template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
+  auto &ElfHdr = Obj.ElfHdrSegment;
+  ElfHdr.Type = PT_PHDR;
+  ElfHdr.Flags = 0;
+  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
+  ElfHdr.VAddr = 0;
+  ElfHdr.PAddr = 0;
+  ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
+  ElfHdr.Align = 0;
+}
+
 template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
   // We need a temporary list of segments that has a special order to it
   // so that we know that anytime ->ParentSegment is set that segment has
@@ -1263,6 +1354,7 @@ template <class ELFT> void ELFWriter<ELF
       Obj.SectionNames->addString(Section.Name);
     }
 
+  initEhdrSegment();
   // Before we can prepare for layout the indexes need to be finalized.
   uint64_t Index = 0;
   for (auto &Sec : Obj.sections())
@@ -1390,6 +1482,11 @@ void BinaryWriter::finalize() {
 namespace llvm {
 namespace objcopy {
 
+template class BinaryELFBuilder<ELF64LE>;
+template class BinaryELFBuilder<ELF64BE>;
+template class BinaryELFBuilder<ELF32LE>;
+template class BinaryELFBuilder<ELF32BE>;
+
 template class ELFBuilder<ELF64LE>;
 template class ELFBuilder<ELF64BE>;
 template class ELFBuilder<ELF32LE>;

Modified: llvm/trunk/tools/llvm-objcopy/Object.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/Object.h?rev=340070&r1=340069&r2=340070&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/Object.h (original)
+++ llvm/trunk/tools/llvm-objcopy/Object.h Fri Aug 17 11:51:11 2018
@@ -64,6 +64,15 @@ public:
 
 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
 
+// This type keeps track of the machine info for various architectures. This
+// lets us map architecture names to ELF types and the e_machine value of the
+// ELF file.
+struct MachineInfo {
+  uint16_t EMachine;
+  bool Is64Bit;
+  bool IsLittleEndian;
+};
+
 class SectionVisitor {
 public:
   virtual ~SectionVisitor();
@@ -196,6 +205,8 @@ private:
   using Elf_Phdr = typename ELFT::Phdr;
   using Elf_Ehdr = typename ELFT::Ehdr;
 
+  void initEhdrSegment();
+
   void writeEhdr();
   void writePhdr(const Segment &Seg);
   void writeShdr(const SectionBase &Sec);
@@ -440,9 +451,11 @@ protected:
   using SymPtr = std::unique_ptr<Symbol>;
 
 public:
-  void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
-                 SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility,
-                 uint16_t Shndx, uint64_t Sz);
+  SymbolTableSection() { Type = ELF::SHT_SYMTAB; }
+
+  void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
+                 uint64_t Value, uint8_t Visibility, uint16_t Shndx,
+                 uint64_t Size);
   void prepareForLayout();
   // An 'empty' symbol table still contains a null symbol.
   bool empty() const { return Symbols.size() == 1; }
@@ -626,11 +639,31 @@ using object::ELFFile;
 using object::ELFObjectFile;
 using object::OwningBinary;
 
+template <class ELFT> class BinaryELFBuilder {
+  using Elf_Sym = typename ELFT::Sym;
+
+  uint16_t EMachine;
+  MemoryBuffer *MemBuf;
+  std::unique_ptr<Object> Obj;
+
+  void initFileHeader();
+  void initHeaderSegment();
+  StringTableSection *addStrTab();
+  SymbolTableSection *addSymTab(StringTableSection *StrTab);
+  void addData(SymbolTableSection *SymTab);
+  void initSections();
+
+public:
+  BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
+      : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique<Object>()) {}
+
+  std::unique_ptr<Object> build();
+};
+
 template <class ELFT> class ELFBuilder {
 private:
   using Elf_Addr = typename ELFT::Addr;
   using Elf_Shdr = typename ELFT::Shdr;
-  using Elf_Ehdr = typename ELFT::Ehdr;
   using Elf_Word = typename ELFT::Word;
 
   const ELFFile<ELFT> &ElfFile;
@@ -650,11 +683,20 @@ public:
   void build();
 };
 
+class BinaryReader : public Reader {
+  const MachineInfo &MInfo;
+  MemoryBuffer *MemBuf;
+
+public:
+  BinaryReader(const MachineInfo &MI, MemoryBuffer *MB)
+      : MInfo(MI), MemBuf(MB) {}
+  std::unique_ptr<Object> create() const override;
+};
+
 class ELFReader : public Reader {
   Binary *Bin;
 
 public:
-  ElfType getElfType() const;
   std::unique_ptr<Object> create() const override;
   explicit ELFReader(Binary *B) : Bin(B) {}
 };
@@ -685,7 +727,6 @@ public:
   Segment ElfHdrSegment;
   Segment ProgramHdrSegment;
 
-  uint8_t Ident[16];
   uint64_t Entry;
   uint64_t SHOffset;
   uint32_t Type;
@@ -711,6 +752,7 @@ public:
     auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
     auto Ptr = Sec.get();
     Sections.emplace_back(std::move(Sec));
+    Ptr->Index = Sections.size();
     return *Ptr;
   }
   Segment &addSegment(ArrayRef<uint8_t> Data) {

Modified: llvm/trunk/tools/llvm-objcopy/llvm-objcopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/llvm-objcopy.cpp?rev=340070&r1=340069&r2=340070&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/llvm-objcopy.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/llvm-objcopy.cpp Fri Aug 17 11:51:11 2018
@@ -34,6 +34,7 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/WithColor.h"
@@ -128,41 +129,51 @@ struct SectionRename {
 };
 
 struct CopyConfig {
-  StringRef OutputFilename;
+  // Main input/output options
   StringRef InputFilename;
-  StringRef OutputFormat;
   StringRef InputFormat;
-  StringRef BinaryArch;
+  StringRef OutputFilename;
+  StringRef OutputFormat;
 
-  StringRef SplitDWO;
+  // Only applicable for --input-format=Binary
+  MachineInfo BinaryArch;
+
+  // Advanced options
   StringRef AddGnuDebugLink;
+  StringRef SplitDWO;
   StringRef SymbolsPrefix;
-  std::vector<StringRef> ToRemove;
-  std::vector<StringRef> Keep;
-  std::vector<StringRef> OnlyKeep;
+
+  // Repeated options
   std::vector<StringRef> AddSection;
   std::vector<StringRef> DumpSection;
-  std::vector<StringRef> SymbolsToLocalize;
+  std::vector<StringRef> Keep;
+  std::vector<StringRef> OnlyKeep;
   std::vector<StringRef> SymbolsToGlobalize;
-  std::vector<StringRef> SymbolsToWeaken;
-  std::vector<StringRef> SymbolsToRemove;
   std::vector<StringRef> SymbolsToKeep;
+  std::vector<StringRef> SymbolsToLocalize;
+  std::vector<StringRef> SymbolsToRemove;
+  std::vector<StringRef> SymbolsToWeaken;
+  std::vector<StringRef> ToRemove;
+
+  // Map options
   StringMap<SectionRename> SectionsToRename;
   StringMap<StringRef> SymbolsToRename;
+
+  // Boolean options
+  bool DiscardAll = false;
+  bool ExtractDWO = false;
+  bool KeepFileSymbols = false;
+  bool LocalizeHidden = false;
+  bool OnlyKeepDebug = false;
+  bool PreserveDates = false;
   bool StripAll = false;
   bool StripAllGNU = false;
+  bool StripDWO = false;
   bool StripDebug = false;
-  bool StripSections = false;
   bool StripNonAlloc = false;
-  bool StripDWO = false;
+  bool StripSections = false;
   bool StripUnneeded = false;
-  bool ExtractDWO = false;
-  bool LocalizeHidden = false;
   bool Weaken = false;
-  bool DiscardAll = false;
-  bool OnlyKeepDebug = false;
-  bool KeepFileSymbols = false;
-  bool PreserveDates = false;
 };
 
 using SectionPred = std::function<bool(const SectionBase &Sec)>;
@@ -295,6 +306,45 @@ static bool onlyKeepDWOPred(const Object
   return !isDWOSection(Sec);
 }
 
+static const StringMap<MachineInfo> ArchMap{
+    // Name, {EMachine, 64bit, LittleEndian}
+    {"aarch64", {EM_AARCH64, true, true}},
+    {"arm", {EM_ARM, false, true}},
+    {"i386", {EM_386, false, true}},
+    {"i386:x86-64", {EM_X86_64, true, true}},
+    {"powerpc:common64", {EM_PPC64, true, true}},
+    {"sparc", {EM_SPARC, false, true}},
+    {"x86-64", {EM_X86_64, true, true}},
+};
+
+static const MachineInfo &getMachineInfo(StringRef Arch) {
+  auto Iter = ArchMap.find(Arch);
+  if (Iter == std::end(ArchMap))
+    error("Invalid architecture: '" + Arch + "'");
+  return Iter->getValue();
+}
+
+static ElfType getOutputElfType(const Binary &Bin) {
+  // Infer output ELF type from the input ELF object
+  if (isa<ELFObjectFile<ELF32LE>>(Bin))
+    return ELFT_ELF32LE;
+  if (isa<ELFObjectFile<ELF64LE>>(Bin))
+    return ELFT_ELF64LE;
+  if (isa<ELFObjectFile<ELF32BE>>(Bin))
+    return ELFT_ELF32BE;
+  if (isa<ELFObjectFile<ELF64BE>>(Bin))
+    return ELFT_ELF64BE;
+  llvm_unreachable("Invalid ELFType");
+}
+
+static ElfType getOutputElfType(const MachineInfo &MI) {
+  // Infer output ELF type from the binary arch specified
+  if (MI.Is64Bit)
+    return MI.IsLittleEndian ? ELFT_ELF64LE : ELFT_ELF64BE;
+  else
+    return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
+}
+
 static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
                                             Object &Obj, Buffer &Buf,
                                             ElfType OutputElfType) {
@@ -603,15 +653,14 @@ static void handleArgs(const CopyConfig
     Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
 }
 
-static void executeElfObjcopyOnBinary(const CopyConfig &Config, Binary &Binary,
-                                      Buffer &Out) {
-  ELFReader Reader(&Binary);
+static void executeElfObjcopyOnBinary(const CopyConfig &Config, Reader &Reader,
+                                      Buffer &Out, ElfType OutputElfType) {
   std::unique_ptr<Object> Obj = Reader.create();
 
-  handleArgs(Config, *Obj, Reader, Reader.getElfType());
+  handleArgs(Config, *Obj, Reader, OutputElfType);
 
   std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, Reader.getElfType());
+      createWriter(Config, *Obj, Out, OutputElfType);
   Writer->finalize();
   Writer->write();
 }
@@ -653,12 +702,15 @@ static void executeElfObjcopyOnArchive(c
     Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (!ChildOrErr)
       reportError(Ar.getFileName(), ChildOrErr.takeError());
+    Binary *Bin = ChildOrErr->get();
+
     Expected<StringRef> ChildNameOrErr = Child.getName();
     if (!ChildNameOrErr)
       reportError(Ar.getFileName(), ChildNameOrErr.takeError());
 
     MemBuffer MB(ChildNameOrErr.get());
-    executeElfObjcopyOnBinary(Config, **ChildOrErr, MB);
+    ELFReader Reader(Bin);
+    executeElfObjcopyOnBinary(Config, Reader, MB, getOutputElfType(*Bin));
 
     Expected<NewArchiveMember> Member =
         NewArchiveMember::getOldMember(Child, true);
@@ -698,16 +750,29 @@ static void executeElfObjcopy(const Copy
     if (auto EC = sys::fs::status(Config.InputFilename, Stat))
       reportError(Config.InputFilename, EC);
 
-  Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
-      createBinary(Config.InputFilename);
-  if (!BinaryOrErr)
-    reportError(Config.InputFilename, BinaryOrErr.takeError());
+  if (Config.InputFormat == "binary") {
+    auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename);
+    if (!BufOrErr)
+      reportError(Config.InputFilename, BufOrErr.getError());
 
-  if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
-    executeElfObjcopyOnArchive(Config, *Ar);
-  } else {
     FileBuffer FB(Config.OutputFilename);
-    executeElfObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
+    BinaryReader Reader(Config.BinaryArch, BufOrErr->get());
+    executeElfObjcopyOnBinary(Config, Reader, FB,
+                              getOutputElfType(Config.BinaryArch));
+  } else {
+    Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
+        createBinary(Config.InputFilename);
+    if (!BinaryOrErr)
+      reportError(Config.InputFilename, BinaryOrErr.takeError());
+
+    if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
+      executeElfObjcopyOnArchive(Config, *Ar);
+    } else {
+      FileBuffer FB(Config.OutputFilename);
+      Binary *Bin = BinaryOrErr.get().getBinary();
+      ELFReader Reader(Bin);
+      executeElfObjcopyOnBinary(Config, Reader, FB, getOutputElfType(*Bin));
+    }
   }
 
   if (Config.PreserveDates) {
@@ -755,7 +820,12 @@ static CopyConfig parseObjcopyOptions(Ar
   Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
   Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
   Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
-  Config.BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
+  if (Config.InputFormat == "binary") {
+    auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
+    if (BinaryArch.empty())
+      error("Specified binary input without specifiying an architecture");
+    Config.BinaryArch = getMachineInfo(BinaryArch);
+  }
 
   Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
   Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);




More information about the llvm-commits mailing list