[llvm-commits] [llvm] r79740 - in /llvm/trunk: include/llvm/MC/MCAssembler.h include/llvm/MC/MCSymbol.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp test/MC/MachO/symbols-1.s

Daniel Dunbar daniel at zuster.org
Sat Aug 22 04:41:11 PDT 2009


Author: ddunbar
Date: Sat Aug 22 06:41:10 2009
New Revision: 79740

URL: http://llvm.org/viewvc/llvm-project?rev=79740&view=rev
Log:
llvm-mc/Mach-O: Improve symbol table support:
 - Honor .globl.

 - Set symbol type and section correctly ('nm' now works), and order symbols
   appropriately.

 - Take care to the string table so that the .o matches 'as' exactly (for ease
   of testing).

Added:
    llvm/trunk/test/MC/MachO/symbols-1.s
Modified:
    llvm/trunk/include/llvm/MC/MCAssembler.h
    llvm/trunk/include/llvm/MC/MCSymbol.h
    llvm/trunk/lib/MC/MCAssembler.cpp
    llvm/trunk/lib/MC/MCMachOStreamer.cpp

Modified: llvm/trunk/include/llvm/MC/MCAssembler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=79740&r1=79739&r2=79740&view=diff

==============================================================================
--- llvm/trunk/include/llvm/MC/MCAssembler.h (original)
+++ llvm/trunk/include/llvm/MC/MCAssembler.h Sat Aug 22 06:41:10 2009
@@ -312,6 +312,10 @@
   /// Offset - The offset to apply to the fragment address to form this symbol's
   /// value.
   uint64_t Offset;
+    
+  /// IsExternal - True if this symbol is visible outside this translation
+  /// unit.
+  unsigned IsExternal : 1;
 
 public:
   // Only for use as sentinel.
@@ -330,6 +334,13 @@
   uint64_t getOffset() const { return Offset; }
   void setOffset(uint64_t Value) { Offset = Value; }
 
+    /// @}
+    /// @name Symbol Attributes
+    /// @{
+
+    bool isExternal() const { return IsExternal; }
+    void setExternal(bool Value) { IsExternal = Value; }
+
   /// @}  
 };
 

Modified: llvm/trunk/include/llvm/MC/MCSymbol.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCSymbol.h?rev=79740&r1=79739&r2=79740&view=diff

==============================================================================
--- llvm/trunk/include/llvm/MC/MCSymbol.h (original)
+++ llvm/trunk/include/llvm/MC/MCSymbol.h Sat Aug 22 06:41:10 2009
@@ -43,7 +43,7 @@
     /// undefined symbols, and the special AbsolutePseudoSection value for
     /// absolute symbols.
     const MCSection *Section;
-    
+
     /// IsTemporary - True if this is an assembler temporary label, which
     /// typically does not survive in the .o file's symbol table.  Usually
     /// "Lfoo" or ".foo".
@@ -52,8 +52,7 @@
   private:  // MCContext creates and uniques these.
     friend class MCContext;
     MCSymbol(const StringRef &_Name, bool _IsTemporary) 
-      : Name(_Name), Section(0),
-        IsTemporary(_IsTemporary) {}
+      : Name(_Name), Section(0), IsTemporary(_IsTemporary) {}
     
     MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
     void operator=(const MCSymbol&); // DO NOT IMPLEMENT
@@ -61,7 +60,7 @@
     /// getName - Get the symbol name.
     const std::string &getName() const { return Name; }
 
-    /// @name Symbol Location Functions
+    /// @name Symbol Type
     /// @{
 
     /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).

Modified: llvm/trunk/lib/MC/MCAssembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=79740&r1=79739&r2=79740&view=diff

==============================================================================
--- llvm/trunk/lib/MC/MCAssembler.cpp (original)
+++ llvm/trunk/lib/MC/MCAssembler.cpp Sat Aug 22 06:41:10 2009
@@ -9,6 +9,7 @@
 
 #include "llvm/MC/MCAssembler.h"
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
@@ -50,6 +51,37 @@
     LCT_Dysymtab = 0xb
   };
 
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const {
+      const std::string &Name = SymbolData->getSymbol().getName();
+      return Name < RHS.SymbolData->getSymbol().getName();
+    }
+  };
+
   raw_ostream &OS;
   bool IsLSB;
 
@@ -245,44 +277,131 @@
     assert(OS.tell() - Start == DysymtabLoadCommandSize);
   }
 
-  void WriteNlist32(uint32_t StringIndex, uint8_t Type, uint8_t Sect,
-                    int16_t Desc, uint32_t Value) {
+  void WriteNlist32(MachSymbolData &MSD) {
+    MCSymbol &Symbol = MSD.SymbolData->getSymbol();
+    uint8_t Type = 0;
+
+    // Set the N_TYPE bits. See <mach-o/nlist.h>.
+    //
+    // FIXME: Are the prebound or indirect fields possible here?
+    if (Symbol.isUndefined())
+      Type = STT_Undefined;
+    else if (Symbol.isAbsolute())
+      Type = STT_Absolute;
+    else
+      Type = STT_Section;
+
+    // FIXME: Set STAB bits.
+
+    // FIXME: Set private external bit.
+
+    // Set external bit.
+    if (MSD.SymbolData->isExternal())
+      Type |= STF_External;
+
     // struct nlist (12 bytes)
 
-    Write32(StringIndex);
+    Write32(MSD.StringIndex);
     Write8(Type);
-    Write8(Sect);
-    Write16(Desc);
-    Write32(Value);
+    Write8(MSD.SectionIndex);
+    Write16(0); // FIXME: Desc
+    Write32(0); // FIXME: Value
   }
 
-  /// ComputeStringTable - Compute the string table, for use in the symbol
-  /// table.
+  /// ComputeSymbolTable - Compute the symbol table data
   ///
   /// \param StringTable [out] - The string table data.
   /// \param StringIndexMap [out] - Map from symbol names to offsets in the
   /// string table.
-  void ComputeStringTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          StringMap<uint64_t> &StringIndexMap) {
-    // Build the string table.
-    //
-    // FIXME: Does 'as' ever bother to compress this when we have a suffix
-    // match?
+
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData) {
+    // Build section lookup table.
+    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+    unsigned Index = 1;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it, ++Index)
+      SectionIndexMap[&it->getSection()] = Index;
+    assert(Index <= 256 && "Too many sections!");
 
     // Index 0 is always the empty string.
+    StringMap<uint64_t> StringIndexMap;
     StringTable += '\x00';
+
+    // Build the symbol arrays and the string table, but only for non-local
+    // symbols.
+    //
+    // The particular order that we collect the symbols and create the string
+    // table, then sort the symbols is chosen to match 'as'. Even though it
+    // doesn't matter for correctness, this is important for letting us diff .o
+    // files.
     for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
            ie = Asm.symbol_end(); it != ie; ++it) {
-      StringRef Name = it->getSymbol().getName();
-      uint64_t &Entry = StringIndexMap[Name];
+      MCSymbol &Symbol = it->getSymbol();
+
+      if (!it->isExternal())
+        continue;
 
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
       if (!Entry) {
         Entry = StringTable.size();
-        StringTable += Name;
+        StringTable += Symbol.getName();
         StringTable += '\x00';
       }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isUndefined()) {
+        MSD.SectionIndex = 0;
+        UndefinedSymbolData.push_back(MSD);
+      } else if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        ExternalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        ExternalSymbolData.push_back(MSD);
+      }
     }
 
+    // Now add the data for local symbols.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      MCSymbol &Symbol = it->getSymbol();
+
+      if (it->isExternal())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      assert(!Symbol.isUndefined() && "Local symbol can not be undefined!");
+      if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        LocalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        LocalSymbolData.push_back(MSD);
+      }
+    }
+
+    // External and undefined symbols are required to be in lexicographic order.
+    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
     // The string table is padded to a multiple of 4.
     //
     // FIXME: Check to see if this varies per arch.
@@ -295,12 +414,15 @@
 
     // Compute symbol table information.
     SmallString<256> StringTable;
-    StringMap<uint64_t> StringIndexMap;
+    std::vector<MachSymbolData> LocalSymbolData;
+    std::vector<MachSymbolData> ExternalSymbolData;
+    std::vector<MachSymbolData> UndefinedSymbolData;
     unsigned NumSymbols = Asm.symbol_size();
 
     // No symbol table command is written if there are no symbols.
     if (NumSymbols)
-      ComputeStringTable(Asm, StringTable, StringIndexMap);
+      ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                         UndefinedSymbolData);
 
     // Compute the file offsets for all the sections in advance, so that we can
     // write things out in order.
@@ -350,13 +472,13 @@
       WriteSymtabLoadCommand(SymbolTableOffset, NumSymbols,
                              StringTableOffset, StringTable.size());
 
-      // FIXME: Get correct symbol indices and counts.
       unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = NumSymbols;
-      unsigned FirstExternalSymbol = NumLocalSymbols;
-      unsigned NumExternalSymbols = 0;
-      unsigned FirstUndefinedSymbol = NumLocalSymbols;
-      unsigned NumUndefinedSymbols = 0;
+      unsigned NumLocalSymbols = LocalSymbolData.size();
+      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+      unsigned NumExternalSymbols = ExternalSymbolData.size();
+      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+      // FIXME: Get correct symbol indices and counts for indirect symbols.
       unsigned IndirectSymbolOffset = 0;
       unsigned NumIndirectSymbols = 0;
       WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
@@ -373,18 +495,15 @@
     if (NumSymbols) {
       // FIXME: Check that offsets match computed ones.
 
-      // FIXME: These need to be reordered, both to segregate into categories
-      // as well as to order some sublists.
+      // FIXME: Some of these are ordered by name to help the linker.
 
       // Write the symbol table entries.
-      for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-             ie = Asm.symbol_end(); it != ie; ++it) {
-        MCSymbol &Sym = it->getSymbol();
-        uint64_t Index = StringIndexMap[Sym.getName()];
-        assert(Index && "Invalid index!");
-        WriteNlist32(Index, /*FIXME: Type=*/0, /*FIXME: Sect=*/0,
-                     /*FIXME: Desc=*/0, /*FIXME: Value=*/0);
-      }
+      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+        WriteNlist32(LocalSymbolData[i]);
+      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+        WriteNlist32(ExternalSymbolData[i]);
+      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+        WriteNlist32(UndefinedSymbolData[i]);
 
       // Write the string table.
       OS << StringTable.str();
@@ -427,7 +546,8 @@
 
 MCSymbolData::MCSymbolData(MCSymbol &_Symbol, MCFragment *_Fragment,
                            uint64_t _Offset, MCAssembler *A)
-  : Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset) 
+  : Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset),
+    IsExternal(false)
 {
   if (A)
     A->getSymbolList().push_back(this);

Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=79740&r1=79739&r2=79740&view=diff

==============================================================================
--- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original)
+++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Sat Aug 22 06:41:10 2009
@@ -142,7 +142,14 @@
 
 void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           SymbolAttr Attribute) {
-  llvm_unreachable("FIXME: Not yet implemented!");
+  switch (Attribute) {
+  default:
+    llvm_unreachable("FIXME: Not yet implemented!");
+
+  case MCStreamer::Global:
+    getSymbolData(*Symbol).setExternal(true);
+    break;
+  }
 }
 
 void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {

Added: llvm/trunk/test/MC/MachO/symbols-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/symbols-1.s?rev=79740&view=auto

==============================================================================
--- llvm/trunk/test/MC/MachO/symbols-1.s (added)
+++ llvm/trunk/test/MC/MachO/symbols-1.s Sat Aug 22 06:41:10 2009
@@ -0,0 +1,160 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+sym_local_B:
+.globl sym_globl_def_B
+.globl sym_globl_undef_B
+sym_local_A:
+.globl sym_globl_def_A
+.globl sym_globl_undef_A
+sym_local_C:
+.globl sym_globl_def_C
+.globl sym_globl_undef_C
+        
+sym_globl_def_A: 
+sym_globl_def_B: 
+sym_globl_def_C: 
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 4)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 260)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 368)
+// CHECK:   ('strsize', 140)
+// CHECK:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 103)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_B')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 115)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_A')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 127)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_local_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 35)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 69)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 17)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_B')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 85)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_globl_undef_C')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 3)
+// CHECK:   ('iundefsym', 6)
+// CHECK:   ('nundefsym', 3)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])





More information about the llvm-commits mailing list