[llvm-commits] [lld] r147799 - in /lld/trunk: include/lld/Core/SymbolTable.h lib/Core/SymbolTable.cpp lib/Core/YamlReader.cpp lib/Core/YamlWriter.cpp test/cstring-coalesce.objtxt

Nick Kledzik kledzik at apple.com
Mon Jan 9 12:18:15 PST 2012


Author: kledzik
Date: Mon Jan  9 14:18:15 2012
New Revision: 147799

URL: http://llvm.org/viewvc/llvm-project?rev=147799&view=rev
Log:
add initial support for coalescing by content (c-strings) with test case

Added:
    lld/trunk/test/cstring-coalesce.objtxt
Modified:
    lld/trunk/include/lld/Core/SymbolTable.h
    lld/trunk/lib/Core/SymbolTable.cpp
    lld/trunk/lib/Core/YamlReader.cpp
    lld/trunk/lib/Core/YamlWriter.cpp

Modified: lld/trunk/include/lld/Core/SymbolTable.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/include/lld/Core/SymbolTable.h?rev=147799&r1=147798&r2=147799&view=diff
==============================================================================
--- lld/trunk/include/lld/Core/SymbolTable.h (original)
+++ lld/trunk/include/lld/Core/SymbolTable.h Mon Jan  9 14:18:15 2012
@@ -14,7 +14,9 @@
 #include <map>
 #include <vector>
 
-namespace llvm { class StringRef; }
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/DenseSet.h"
+
 
 namespace lld {
 
@@ -52,12 +54,21 @@
 private:
   typedef std::map<llvm::StringRef, const Atom *> NameToAtom;
   typedef std::map<const Atom *, const Atom *> AtomToAtom;
+  struct MyMappingInfo {
+    static const Atom * getEmptyKey() { return NULL; }
+    static const Atom * getTombstoneKey() { return (Atom*)(-1); }
+    static unsigned getHashValue(const Atom * const Val);
+    static bool isEqual(const Atom * const LHS, const Atom * const RHS);
+  };
+  typedef llvm::DenseSet<const Atom*, MyMappingInfo> AtomContentSet;
 
   void addByName(const Atom &);
+  void addByContent(const Atom &);
 
   Platform&  _platform;
   AtomToAtom _replacedAtoms;
   NameToAtom _nameTable;
+  AtomContentSet _contentTable;
 };
 
 } // namespace lld

Modified: lld/trunk/lib/Core/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/SymbolTable.cpp?rev=147799&r1=147798&r2=147799&view=diff
==============================================================================
--- lld/trunk/lib/Core/SymbolTable.cpp (original)
+++ lld/trunk/lib/Core/SymbolTable.cpp Mon Jan  9 14:18:15 2012
@@ -16,6 +16,8 @@
 #include "lld/Platform/Platform.h"
 
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ArrayRef.h"
 
 #include <algorithm>
 #include <cassert>
@@ -34,7 +36,7 @@
     this->addByName(atom);
   }
   else if ( atom.mergeDuplicates() ) {
-    // TO DO: support constants merging
+    this->addByContent(atom);
   }
 }
 
@@ -130,6 +132,58 @@
   }
 }
 
+
+unsigned SymbolTable::MyMappingInfo::getHashValue(const Atom * const atom) {
+  unsigned hash = atom->size();
+  if ( atom->contentType() != Atom::typeZeroFill ) {
+    llvm::ArrayRef<uint8_t> content = atom->rawContent();
+    for (unsigned int i=0; i < content.size(); ++i) {
+      hash = hash * 33 + content[i];
+    }
+  }
+  hash &= 0x00FFFFFF;
+  hash |= ((unsigned)atom->contentType()) << 24;
+  //fprintf(stderr, "atom=%p, hash=0x%08X\n", atom, hash);
+  return hash;
+}
+
+
+bool SymbolTable::MyMappingInfo::isEqual(const Atom * const l, 
+                                         const Atom * const r) {
+  if ( l == r )
+    return true;
+  if ( l == getEmptyKey() )
+    return false;
+  if ( r == getEmptyKey() )
+    return false;
+  if ( l == getTombstoneKey() )
+    return false;
+  if ( r == getTombstoneKey() )
+    return false;
+    
+  if ( l->contentType() != r->contentType() )
+    return false;
+  if ( l->size() != r->size() )
+    return false;
+  llvm::ArrayRef<uint8_t> lc = l->rawContent();
+  llvm::ArrayRef<uint8_t> rc = r->rawContent();
+  return lc.equals(rc);
+}
+
+
+void SymbolTable::addByContent(const Atom & newAtom) {
+  AtomContentSet::iterator pos = _contentTable.find(&newAtom);
+  if ( pos == _contentTable.end() ) {
+    _contentTable.insert(&newAtom);
+    return;
+  }
+  const Atom* existing = *pos;
+    // New atom is not being used.  Add it to replacement table.
+    _replacedAtoms[&newAtom] = existing;
+}
+
+
+
 const Atom *SymbolTable::findByName(llvm::StringRef sym) {
   NameToAtom::iterator pos = _nameTable.find(sym);
   if (pos == _nameTable.end())

Modified: lld/trunk/lib/Core/YamlReader.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/YamlReader.cpp?rev=147799&r1=147798&r2=147799&view=diff
==============================================================================
--- lld/trunk/lib/Core/YamlReader.cpp (original)
+++ lld/trunk/lib/Core/YamlReader.cpp Mon Jan  9 14:18:15 2012
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -66,18 +67,21 @@
 class YAML {
 public:
   struct Entry {
-    Entry(const char *k, const char *v, int d, bool bd, bool bs)
+    Entry(const char *k, const char *v, std::vector<uint8_t>* vs, 
+          int d, bool bd, bool bs)
       : key(strdup(k))
-      , value(strdup(v))
+      , value(v ? strdup(v) : NULL)
+      , valueSequenceBytes(vs)
       , depth(d)
       , beginSequence(bs)
       , beginDocument(bd) {}
 
-    const char *key;
-    const char *value;
-    int         depth;
-    bool        beginSequence;
-    bool        beginDocument;
+    const char *          key;
+    const char *          value;
+    std::vector<uint8_t>* valueSequenceBytes;
+    int                   depth;
+    bool                  beginSequence;
+    bool                  beginDocument;
   };
 
   static void parse(llvm::MemoryBuffer *mb, std::vector<const Entry *>&);
@@ -107,6 +111,8 @@
   int depth = 0;
   bool nextKeyIsStartOfDocument = false;
   bool nextKeyIsStartOfSequence = false;
+  std::vector<uint8_t>* sequenceBytes = NULL;
+  unsigned contentByte = 0;
   for (const char *s = mb->getBufferStart(); s < mb->getBufferEnd(); ++s) {
     char c = *s;
     if (c == '\n')
@@ -204,7 +210,7 @@
         *p++ = c;
         state = inValue;
       } else if (c == '\n') {
-        entries.push_back(new Entry(key, "", depth,
+        entries.push_back(new Entry(key, "", NULL, depth,
                                     nextKeyIsStartOfDocument,
                                     nextKeyIsStartOfSequence));
         nextKeyIsStartOfSequence = false;
@@ -212,6 +218,8 @@
         state = inDocument;
         depth = 0;
       } else if (c == '[') {
+        contentByte = 0;
+        sequenceBytes = new std::vector<uint8_t>();
         state = inValueSequence;
       } else if (c == ' ') {
         // eat space
@@ -226,7 +234,7 @@
         *p++ = c;
       } else if (c == '\n') {
         *p = '\0';
-        entries.push_back(new Entry(key, value, depth,
+        entries.push_back(new Entry(key, value, NULL, depth,
                                     nextKeyIsStartOfDocument,
                                     nextKeyIsStartOfSequence));
         nextKeyIsStartOfSequence = false;
@@ -236,11 +244,33 @@
       }
       break;
     case inValueSequence:
-      if (c == ']')
+      if (c == ']') {
+        sequenceBytes->push_back(contentByte);
         state = inValueSequenceEnd;
+      }
+      else if (c == ' ') {
+        // eat white space
+      }
+      else if (c == ',') {
+        sequenceBytes->push_back(contentByte);
+      }
+      else if ( isdigit(c) ) {
+        contentByte = (contentByte << 4) | (c-'0');
+      } 
+      else if ( ('a' <= tolower(c)) && (tolower(c) <= 'f') ) {
+        contentByte = (contentByte << 4) | (tolower(c)-'a'+10);
+      }
+      else {
+        llvm::report_fatal_error("non-hex digit found in content [ ]");
+      }
       break;
     case inValueSequenceEnd:
       if (c == '\n') {
+        entries.push_back(new Entry(key, NULL, sequenceBytes, depth,
+                                    nextKeyIsStartOfDocument,
+                                    nextKeyIsStartOfSequence));
+        nextKeyIsStartOfSequence = false;
+        nextKeyIsStartOfDocument = false;
         state = inDocument;
         depth = 0;
       }
@@ -296,11 +326,13 @@
           , YAMLFile& f
           , const char *n
           , const char* sn
-          , uint64_t sz)
+          , uint64_t sz
+          , std::vector<uint8_t>* c)
     : Atom(ord, d, s, ct, sc, intn, md, ah, dsk, tb, al, a)
     , _file(f)
     , _name(n)
     , _sectionName(sn)
+    , _content(c)
     , _size(sz)
     , _refStartIndex(f._lastRefIndex)
     , _refEndIndex(f._references.size()) {
@@ -320,7 +352,7 @@
   }
   
   virtual llvm::StringRef customSectionName() const {
-    return _sectionName;
+    return (_sectionName ? _sectionName : llvm::StringRef());
   }
 
   virtual uint64_t objectAddress() const {
@@ -328,19 +360,26 @@
   }
 
   virtual uint64_t size() const {
-    return _size;
+    return (_content ? _content->size() : _size);
   }
 
-  virtual void copyRawContent(uint8_t buffer[]) const { }
+  llvm::ArrayRef<uint8_t> rawContent() const {
+    if ( _content != NULL ) 
+      return llvm::ArrayRef<uint8_t>(*_content);
+    else
+      return llvm::ArrayRef<uint8_t>();
+  }
+  
   virtual Reference::iterator referencesBegin() const;
   virtual Reference::iterator referencesEnd() const;
 private:
-  YAMLFile&      _file;
-  const char *   _name;
-  const char *   _sectionName;
-  unsigned long  _size;
-  unsigned int   _refStartIndex;
-  unsigned int   _refEndIndex;
+  YAMLFile&             _file;
+  const char *          _name;
+  const char *          _sectionName;
+  std::vector<uint8_t>* _content;
+  unsigned long         _size;
+  unsigned int          _refStartIndex;
+  unsigned int          _refEndIndex;
 };
 
 Reference::iterator YAMLAtom::referencesBegin() const {
@@ -384,6 +423,7 @@
   bool _alias;
   bool _autoHide;
   const char *_sectionName;
+  std::vector<uint8_t>* _content;
   Reference _ref;
 };
 
@@ -395,13 +435,15 @@
   , _type(KeyValues::contentTypeDefault)
   , _scope(KeyValues::scopeDefault)
   , _def(KeyValues::definitionDefault)
+  , _sectionChoice(KeyValues::sectionChoiceDefault)
   , _internalName(KeyValues::internalNameDefault)
   , _mergeDuplicates(KeyValues::mergeDuplicatesDefault)
   , _deadStrip(KeyValues::deadStripKindDefault)
   , _thumb(KeyValues::isThumbDefault)
   , _alias(KeyValues::isAliasDefault) 
   , _autoHide(KeyValues::autoHideDefault)
-  , _sectionName(NULL) {
+  , _sectionName(NULL)
+  , _content(NULL) {
   _ref.target       = NULL;
   _ref.addend       = 0;
   _ref.offsetInAtom = 0;
@@ -413,7 +455,7 @@
   Atom *a = new YAMLAtom(_ordinal, _def, _scope, _type, _sectionChoice,
                          _internalName, _mergeDuplicates, _autoHide,  
                          _deadStrip, _thumb, _alias, _align, f, 
-                         _name, _sectionName, _size);
+                         _name, _sectionName, _size, _content);
 
   f._atoms.push_back(a);
   ++_ordinal;
@@ -433,6 +475,7 @@
   _alias            = KeyValues::isAliasDefault;
   _autoHide         = KeyValues::autoHideDefault;
   _sectionName      = NULL;
+  _content          = NULL;
   _ref.target       = NULL;
   _ref.addend       = 0;
   _ref.offsetInAtom = 0;
@@ -593,7 +636,7 @@
           haveAtom = true;
         } 
         else if (strcmp(entry->key, KeyValues::contentKeyword) == 0) {
-          // TO DO: switch to content mode
+          atomState._content = entry->valueSequenceBytes;
           haveAtom = true;
         } 
         else if (strcmp(entry->key, "align2") == 0) {

Modified: lld/trunk/lib/Core/YamlWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/Core/YamlWriter.cpp?rev=147799&r1=147798&r2=147799&view=diff
==============================================================================
--- lld/trunk/lib/Core/YamlWriter.cpp (original)
+++ lld/trunk/lib/Core/YamlWriter.cpp Mon Jan  9 14:18:15 2012
@@ -15,6 +15,8 @@
 #include "lld/Core/Reference.h"
 
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/system_error.h"
@@ -141,6 +143,24 @@
     }
 
      
+    if ( atom.contentType() != Atom::typeZeroFill ) {
+      _out  << "      " 
+            << KeyValues::contentKeyword 
+            << ":"
+            << spacePadding(KeyValues::contentKeyword)
+            << "[ ";
+      llvm::ArrayRef<uint8_t> arr = atom.rawContent();
+      bool needComma = false;
+      for (unsigned int i=0; i < arr.size(); ++i) {
+        if ( needComma )
+          _out << ", ";
+        _out << hexdigit(arr[i] >> 4);
+        _out << hexdigit(arr[i] & 0x0F);
+        needComma = true;
+      }
+      _out << " ]\n";
+    }
+
     if (atom.referencesBegin() != atom.referencesEnd()) {
       _out << "      fixups:\n";
       for (Reference::iterator it = atom.referencesBegin(),
@@ -160,7 +180,12 @@
     return &spaces[strlen(key)];
   }
 
-
+  char hexdigit(uint8_t nibble) {
+    if ( nibble < 0x0A )
+      return '0' + nibble;
+    else
+      return 'A' + nibble - 0x0A;
+  }
 
   llvm::raw_ostream&  _out;
   bool                _firstAtom;

Added: lld/trunk/test/cstring-coalesce.objtxt
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/cstring-coalesce.objtxt?rev=147799&view=auto
==============================================================================
--- lld/trunk/test/cstring-coalesce.objtxt (added)
+++ lld/trunk/test/cstring-coalesce.objtxt Mon Jan  9 14:18:15 2012
@@ -0,0 +1,45 @@
+# RUN: lld-core %s | FileCheck %s
+
+#
+# Test that duplicate c-strings are coalesced
+#
+
+---
+atoms:
+    - name:              L0
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 68, 65, 6c, 6c, 6f, 00 ]
+      
+    - name:              L1
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 74, 68, 65, 72, 65, 00 ]
+---
+atoms:
+    - name:              L2
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 68, 65, 6c, 6c, 6f, 00 ]
+---
+atoms:
+    - name:              L2
+      internal-name:     true
+      scope:             hidden
+      type:              c-string
+      merge-duplicates:  true
+      content:           [ 74, 68, 65, 72, 65, 00 ]
+...
+
+# CHECK:       type:       c-string
+# CHECK:       content:    [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK:       type:       c-string
+# CHECK:       content:    [ 74, 68, 65, 72, 65, 00 ]
+# CHECK-NOT:   name:
+# CHECK:       ...





More information about the llvm-commits mailing list