[llvm-commits] [llvm] r79739 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp

Daniel Dunbar daniel at zuster.org
Sat Aug 22 03:13:24 PDT 2009


Author: ddunbar
Date: Sat Aug 22 05:13:24 2009
New Revision: 79739

URL: http://llvm.org/viewvc/llvm-project?rev=79739&view=rev
Log:
llvm-mc/Mach-O: Sketch symbol table support.
 - The only .s syntax this honors right now is emitting labels, and some parts
   of the symbol table generation are wrong or faked.

 - This is enough to get nm to report such symbols... incorrectly, but still.

Also, fixed byte emission to extend the previous fragment if possible.

Modified:
    llvm/trunk/include/llvm/MC/MCAssembler.h
    llvm/trunk/lib/MC/MCAssembler.cpp
    llvm/trunk/lib/MC/MCMachOStreamer.cpp

Modified: llvm/trunk/include/llvm/MC/MCAssembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=79739&r1=79738&r2=79739&view=diff

==============================================================================
--- llvm/trunk/include/llvm/MC/MCAssembler.h (original)
+++ llvm/trunk/include/llvm/MC/MCAssembler.h Sat Aug 22 05:13:24 2009
@@ -64,7 +64,7 @@
 
   // FIXME: This should be abstract, fix sentinel.
   virtual uint64_t getMaxFileSize() const {
-    assert(0 && "Invalid getMaxFileSize call !");
+    assert(0 && "Invalid getMaxFileSize call!");
     return 0;
   };
 
@@ -270,7 +270,6 @@
   unsigned getAlignment() const { return Alignment; }
   void setAlignment(unsigned Value) { Alignment = Value; }
 
-
   /// @name Section List Access
   /// @{
 
@@ -285,6 +284,8 @@
 
   size_t size() const { return Fragments.size(); }
 
+  bool empty() const { return Fragments.empty(); }
+
   /// @}
   /// @name Assembler Backend Support
   /// @{
@@ -300,13 +301,49 @@
   /// @}
 };
 
+// FIXME: Same concerns as with SectionData.
+class MCSymbolData : public ilist_node<MCSymbolData> {
+public:
+  MCSymbol &Symbol;
+
+  /// Fragment - The fragment this symbol's value is relative to, if any.
+  MCFragment *Fragment;
+
+  /// Offset - The offset to apply to the fragment address to form this symbol's
+  /// value.
+  uint64_t Offset;
+
+public:
+  // Only for use as sentinel.
+  MCSymbolData();
+  MCSymbolData(MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset,
+               MCAssembler *A = 0);
+
+  /// @name Accessors
+  /// @{
+
+  MCSymbol &getSymbol() const { return Symbol; }
+
+  MCFragment *getFragment() const { return Fragment; }
+  void setFragment(MCFragment *Value) { Fragment = Value; }
+
+  uint64_t getOffset() const { return Offset; }
+  void setOffset(uint64_t Value) { Offset = Value; }
+
+  /// @}  
+};
+
 class MCAssembler {
 public:
   typedef iplist<MCSectionData> SectionDataListType;
+  typedef iplist<MCSymbolData> SymbolDataListType;
 
   typedef SectionDataListType::const_iterator const_iterator;
   typedef SectionDataListType::iterator iterator;
 
+  typedef SymbolDataListType::const_iterator const_symbol_iterator;
+  typedef SymbolDataListType::iterator symbol_iterator;
+
 private:
   MCAssembler(const MCAssembler&);    // DO NOT IMPLEMENT
   void operator=(const MCAssembler&); // DO NOT IMPLEMENT
@@ -315,6 +352,8 @@
   
   iplist<MCSectionData> Sections;
 
+  iplist<MCSymbolData> Symbols;
+
 private:
   /// LayoutSection - Assign offsets and sizes to the fragments in the section
   /// \arg SD, and update the section size. The section file offset should
@@ -351,6 +390,21 @@
   size_t size() const { return Sections.size(); }
 
   /// @}
+  /// @name Symbol List Access
+  /// @{
+
+  const SymbolDataListType &getSymbolList() const { return Symbols; }
+  SymbolDataListType &getSymbolList() { return Symbols; }
+
+  symbol_iterator symbol_begin() { return Symbols.begin(); }
+  const_symbol_iterator symbol_begin() const { return Symbols.begin(); }
+
+  symbol_iterator symbol_end() { return Symbols.end(); }
+  const_symbol_iterator symbol_end() const { return Symbols.end(); }
+
+  size_t symbol_size() const { return Symbols.size(); }
+
+  /// @}
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/MC/MCAssembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=79739&r1=79738&r2=79739&view=diff

==============================================================================
--- llvm/trunk/lib/MC/MCAssembler.cpp (original)
+++ llvm/trunk/lib/MC/MCAssembler.cpp Sat Aug 22 05:13:24 2009
@@ -9,6 +9,8 @@
 
 #include "llvm/MC/MCAssembler.h"
 
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/DataTypes.h"
@@ -18,7 +20,10 @@
 
 using namespace llvm;
 
-namespace {
+class MachObjectWriter;
+
+static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
+                          MachObjectWriter &MOW);
 
 class MachObjectWriter {
   // See <mach-o/loader.h>.
@@ -31,13 +36,18 @@
   static const unsigned Header64Size = 32;
   static const unsigned SegmentLoadCommand32Size = 56;
   static const unsigned Section32Size = 68;
+  static const unsigned SymtabLoadCommandSize = 24;
+  static const unsigned DysymtabLoadCommandSize = 80;
+  static const unsigned Nlist32Size = 12;
 
   enum HeaderFileType {
     HFT_Object = 0x1
   };
 
   enum LoadCommandType {
-    LCT_Segment = 0x1
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb
   };
 
   raw_ostream &OS;
@@ -102,7 +112,7 @@
 
   /// @}
   
-  void WriteHeader32(unsigned NumSections) {
+  void WriteHeader32(unsigned NumLoadCommands, unsigned LoadCommandsSize) {
     // struct mach_header (28 bytes)
 
     uint64_t Start = OS.tell();
@@ -119,25 +129,19 @@
     Write32(HFT_Object);
 
     // Object files have a single load command, the segment.
-    Write32(1);
-    Write32(SegmentLoadCommand32Size + NumSections * Section32Size);
+    Write32(NumLoadCommands);
+    Write32(LoadCommandsSize);
     Write32(0); // Flags
 
     assert(OS.tell() - Start == Header32Size);
   }
 
-  void WriteLoadCommandHeader(uint32_t Cmd, uint32_t CmdSize) {
-    assert((CmdSize & 0x3) == 0 && "Invalid size!");
-
-    Write32(Cmd);
-    Write32(CmdSize);
-  }
-
   /// WriteSegmentLoadCommand32 - Write a 32-bit segment load command.
   ///
   /// \arg NumSections - The number of sections in this segment.
   /// \arg SectionDataSize - The total size of the sections.
   void WriteSegmentLoadCommand32(unsigned NumSections,
+                                 uint64_t SectionDataStartOffset,
                                  uint64_t SectionDataSize) {
     // struct segment_command (56 bytes)
 
@@ -150,8 +154,7 @@
     WriteString("", 16);
     Write32(0); // vmaddr
     Write32(SectionDataSize); // vmsize
-    Write32(Header32Size + SegmentLoadCommand32Size + 
-            NumSections * Section32Size); // file offset
+    Write32(SectionDataStartOffset); // file offset
     Write32(SectionDataSize); // file size
     Write32(0x7); // maxprot
     Write32(0x7); // initprot
@@ -187,18 +190,137 @@
     assert(OS.tell() - Start == Section32Size);
   }
 
-  void WriteProlog(MCAssembler &Asm) {
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize) {
+    // struct symtab_command (24 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Symtab);
+    Write32(SymtabLoadCommandSize);
+    Write32(SymbolOffset);
+    Write32(NumSymbols);
+    Write32(StringTableOffset);
+    Write32(StringTableSize);
+
+    assert(OS.tell() - Start == SymtabLoadCommandSize);
+  }
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols) {
+    // struct dysymtab_command (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(LCT_Dysymtab);
+    Write32(DysymtabLoadCommandSize);
+    Write32(FirstLocalSymbol);
+    Write32(NumLocalSymbols);
+    Write32(FirstExternalSymbol);
+    Write32(NumExternalSymbols);
+    Write32(FirstUndefinedSymbol);
+    Write32(NumUndefinedSymbols);
+    Write32(0); // tocoff
+    Write32(0); // ntoc
+    Write32(0); // modtaboff
+    Write32(0); // nmodtab
+    Write32(0); // extrefsymoff
+    Write32(0); // nextrefsyms
+    Write32(IndirectSymbolOffset);
+    Write32(NumIndirectSymbols);
+    Write32(0); // extreloff
+    Write32(0); // nextrel
+    Write32(0); // locreloff
+    Write32(0); // nlocrel
+
+    assert(OS.tell() - Start == DysymtabLoadCommandSize);
+  }
+
+  void WriteNlist32(uint32_t StringIndex, uint8_t Type, uint8_t Sect,
+                    int16_t Desc, uint32_t Value) {
+    // struct nlist (12 bytes)
+
+    Write32(StringIndex);
+    Write8(Type);
+    Write8(Sect);
+    Write16(Desc);
+    Write32(Value);
+  }
+
+  /// ComputeStringTable - Compute the string table, for use in the symbol
+  /// table.
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeStringTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          StringMap<uint64_t> &StringIndexMap) {
+    // Build the string table.
+    //
+    // FIXME: Does 'as' ever bother to compress this when we have a suffix
+    // match?
+
+    // Index 0 is always the empty string.
+    StringTable += '\x00';
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      StringRef Name = it->getSymbol().getName();
+      uint64_t &Entry = StringIndexMap[Name];
+
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Name;
+        StringTable += '\x00';
+      }
+    }
+
+    // The string table is padded to a multiple of 4.
+    //
+    // FIXME: Check to see if this varies per arch.
+    while (StringTable.size() % 4)
+      StringTable += '\x00';
+  }
+
+  void WriteObject(MCAssembler &Asm) {
     unsigned NumSections = Asm.size();
 
+    // Compute symbol table information.
+    SmallString<256> StringTable;
+    StringMap<uint64_t> StringIndexMap;
+    unsigned NumSymbols = Asm.symbol_size();
+
+    // No symbol table command is written if there are no symbols.
+    if (NumSymbols)
+      ComputeStringTable(Asm, StringTable, StringIndexMap);
+
     // Compute the file offsets for all the sections in advance, so that we can
     // write things out in order.
     SmallVector<uint64_t, 16> SectionFileOffsets;
     SectionFileOffsets.resize(NumSections);
   
-    // The section data starts after the header, the segment load command, and
-    // the section headers.
-    uint64_t FileOffset = Header32Size + SegmentLoadCommand32Size + 
-      NumSections * Section32Size;
+    // The section data starts after the header, the segment load command (and
+    // section headers) and the symbol table.
+    unsigned NumLoadCommands = 1;
+    uint64_t LoadCommandsSize =
+      SegmentLoadCommand32Size + NumSections * Section32Size;
+
+    // Add the symbol table load command sizes, if used.
+    if (NumSymbols) {
+      NumLoadCommands += 2;
+      LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+    }
+
+    uint64_t FileOffset = Header32Size + LoadCommandsSize;
+    uint64_t SectionDataStartOffset = FileOffset;
     uint64_t SectionDataSize = 0;
     unsigned Index = 0;
     for (MCAssembler::iterator it = Asm.begin(),
@@ -209,19 +331,66 @@
     }
 
     // Write the prolog, starting with the header and load command...
-    WriteHeader32(NumSections);
-    WriteSegmentLoadCommand32(NumSections, SectionDataSize);
+    WriteHeader32(NumLoadCommands, LoadCommandsSize);
+    WriteSegmentLoadCommand32(NumSections, SectionDataStartOffset,
+                              SectionDataSize);
   
     // ... and then the section headers.
     Index = 0;
     for (MCAssembler::iterator it = Asm.begin(),
            ie = Asm.end(); it != ie; ++it, ++Index)
       WriteSection32(*it, SectionFileOffsets[Index]);
-  }
 
-};
+    // Write the symbol table load command, if used.
+    if (NumSymbols) {
+      // The string table is written after all the section data.
+      uint64_t SymbolTableOffset = SectionDataStartOffset + SectionDataSize;
+      uint64_t StringTableOffset =
+        SymbolTableOffset + NumSymbols * Nlist32Size;
+      WriteSymtabLoadCommand(SymbolTableOffset, NumSymbols,
+                             StringTableOffset, StringTable.size());
+
+      // FIXME: Get correct symbol indices and counts.
+      unsigned FirstLocalSymbol = 0;
+      unsigned NumLocalSymbols = NumSymbols;
+      unsigned FirstExternalSymbol = NumLocalSymbols;
+      unsigned NumExternalSymbols = 0;
+      unsigned FirstUndefinedSymbol = NumLocalSymbols;
+      unsigned NumUndefinedSymbols = 0;
+      unsigned IndirectSymbolOffset = 0;
+      unsigned NumIndirectSymbols = 0;
+      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                               FirstExternalSymbol, NumExternalSymbols,
+                               FirstUndefinedSymbol, NumUndefinedSymbols,
+                               IndirectSymbolOffset, NumIndirectSymbols);
+    }
 
-}
+    // Write the actual section data.
+    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+      WriteFileData(OS, *it, *this);
+
+    // Write the symbol table data, if used.
+    if (NumSymbols) {
+      // FIXME: Check that offsets match computed ones.
+
+      // FIXME: These need to be reordered, both to segregate into categories
+      // as well as to order some sublists.
+
+      // Write the symbol table entries.
+      for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+             ie = Asm.symbol_end(); it != ie; ++it) {
+        MCSymbol &Sym = it->getSymbol();
+        uint64_t Index = StringIndexMap[Sym.getName()];
+        assert(Index && "Invalid index!");
+        WriteNlist32(Index, /*FIXME: Type=*/0, /*FIXME: Sect=*/0,
+                     /*FIXME: Desc=*/0, /*FIXME: Value=*/0);
+      }
+
+      // Write the string table.
+      OS << StringTable.str();
+    }
+  }
+};
 
 /* *** */
 
@@ -254,6 +423,18 @@
 
 /* *** */
 
+MCSymbolData::MCSymbolData() : Symbol(*(MCSymbol*)0) {}
+
+MCSymbolData::MCSymbolData(MCSymbol &_Symbol, MCFragment *_Fragment,
+                           uint64_t _Offset, MCAssembler *A)
+  : Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset) 
+{
+  if (A)
+    A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
 MCAssembler::MCAssembler(raw_ostream &_OS) : OS(_OS) {}
 
 MCAssembler::~MCAssembler() {
@@ -400,15 +581,9 @@
   for (iterator it = begin(), ie = end(); it != ie; ++it)
     LayoutSection(*it);
 
+  // Write the object file.
   MachObjectWriter MOW(OS);
-
-  // Write the prolog, followed by the data for all the sections & fragments.
-  MOW.WriteProlog(*this);
-
-  // FIXME: This should move into the Mach-O writer, it should have control over
-  // what goes where.
-  for (iterator it = begin(), ie = end(); it != ie; ++it)
-    WriteFileData(OS, *it, MOW);
+  MOW.WriteObject(*this);
 
   OS.flush();
 }

Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=79739&r1=79738&r2=79739&view=diff

==============================================================================
--- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original)
+++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Sat Aug 22 05:13:24 2009
@@ -26,6 +26,27 @@
   MCSectionData *CurSectionData;
 
   DenseMap<const MCSection*, MCSectionData*> SectionMap;
+  
+  DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
+
+private:
+  MCFragment *getCurrentFragment() const {
+    assert(CurSectionData && "No current section!");
+
+    if (!CurSectionData->empty())
+      return &CurSectionData->getFragmentList().back();
+
+    return 0;
+  }
+
+  MCSymbolData &getSymbolData(MCSymbol &Symbol) {
+    MCSymbolData *&Entry = SymbolMap[&Symbol];
+
+    if (!Entry)
+      Entry = new MCSymbolData(Symbol, 0, 0, &Assembler);
+
+    return *Entry;
+  }
 
 public:
   MCMachOStreamer(MCContext &Context, raw_ostream &_OS)
@@ -92,10 +113,16 @@
 
 void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-  assert(CurSection && "Cannot emit before setting section!");
-
-  llvm_unreachable("FIXME: Not yet implemented!");
 
+  MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!F)
+    F = new MCDataFragment(CurSectionData);
+
+  MCSymbolData &SD = getSymbolData(*Symbol);
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+  
   Symbol->setSection(*CurSection);
 }
 
@@ -138,7 +165,9 @@
 }
 
 void MCMachOStreamer::EmitBytes(const StringRef &Data) {
-  MCDataFragment *DF = new MCDataFragment(CurSectionData);
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
   DF->getContents().append(Data.begin(), Data.end());
 }
 
@@ -154,7 +183,7 @@
   new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
                       CurSectionData);
 
-  // Update the maximum alignment on the current section if necessary
+  // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > CurSectionData->getAlignment())
     CurSectionData->setAlignment(ByteAlignment);
 }





More information about the llvm-commits mailing list