[llvm] r223278 - Split the set of identified struct types into opaque and non-opaque ones.

Rafael Espindola rafael.espindola at gmail.com
Wed Dec 3 14:36:38 PST 2014


Author: rafael
Date: Wed Dec  3 16:36:37 2014
New Revision: 223278

URL: http://llvm.org/viewvc/llvm-project?rev=223278&view=rev
Log:
Split the set of identified struct types into opaque and non-opaque ones.

The non-opaque part can be structurally uniqued. To keep this to just
a hash lookup, we don't try to unique cyclic types.

Also change the type mapping algorithm to be optimistic about a type
not being recursive and only create a new type when proven to be wrong.
This is not as strong as trying to speculate that we can keep the source
type, but is simpler (no speculation to revert) and more powerfull
than what we had before (we don't copy non-recursive types at least).

I initially wrote this to try to replace the name based type merging.
It is not strong enough to replace it, but is is a useful addition.

With this patch the number of named struct types is a clang lto bootstrap goes
from 49674 to 15986.

Modified:
    llvm/trunk/include/llvm/Linker/Linker.h
    llvm/trunk/lib/Linker/LinkModules.cpp
    llvm/trunk/test/Linker/type-unique-src-type.ll

Modified: llvm/trunk/include/llvm/Linker/Linker.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Linker/Linker.h?rev=223278&r1=223277&r2=223278&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Linker/Linker.h (original)
+++ llvm/trunk/include/llvm/Linker/Linker.h Wed Dec  3 16:36:37 2014
@@ -10,7 +10,9 @@
 #ifndef LLVM_LINKER_LINKER_H
 #define LLVM_LINKER_LINKER_H
 
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 
 #include <functional>
 
@@ -18,6 +20,7 @@ namespace llvm {
 class DiagnosticInfo;
 class Module;
 class StructType;
+class Type;
 
 /// This class provides the core functionality of linking in LLVM. It keeps a
 /// pointer to the merged module so far. It doesn't take ownership of the
@@ -27,6 +30,40 @@ class Linker {
 public:
   typedef std::function<void(const DiagnosticInfo &)> DiagnosticHandlerFunction;
 
+  struct StructTypeKeyInfo {
+    struct KeyTy {
+      ArrayRef<Type *> ETypes;
+      bool IsPacked;
+      KeyTy(ArrayRef<Type *> E, bool P);
+      KeyTy(const StructType *ST);
+      bool operator==(const KeyTy &that) const;
+      bool operator!=(const KeyTy &that) const;
+    };
+    static StructType *getEmptyKey();
+    static StructType *getTombstoneKey();
+    static unsigned getHashValue(const KeyTy &Key);
+    static unsigned getHashValue(const StructType *ST);
+    static bool isEqual(const KeyTy &LHS, const StructType *RHS);
+    static bool isEqual(const StructType *LHS, const StructType *RHS);
+  };
+
+  typedef DenseMap<StructType *, bool, StructTypeKeyInfo>
+      NonOpaqueStructTypeSet;
+  typedef DenseSet<StructType *> OpaqueStructTypeSet;
+
+  struct IdentifiedStructTypeSet {
+    // The set of opaque types is the composite module.
+    OpaqueStructTypeSet OpaqueStructTypes;
+
+    // The set of identified but non opaque structures in the composite module.
+    NonOpaqueStructTypeSet NonOpaqueStructTypes;
+
+    void addNonOpaque(StructType *Ty);
+    void addOpaque(StructType *Ty);
+    StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
+    bool hasType(StructType *Ty);
+  };
+
   Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
   Linker(Module *M);
   ~Linker();
@@ -46,7 +83,9 @@ public:
 private:
   void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
   Module *Composite;
-  SmallPtrSet<StructType *, 32> IdentifiedStructTypes;
+
+  IdentifiedStructTypeSet IdentifiedStructTypes;
+
   DiagnosticHandlerFunction DiagnosticHandler;
 };
 

Modified: llvm/trunk/lib/Linker/LinkModules.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Linker/LinkModules.cpp?rev=223278&r1=223277&r2=223278&view=diff
==============================================================================
--- llvm/trunk/lib/Linker/LinkModules.cpp (original)
+++ llvm/trunk/lib/Linker/LinkModules.cpp Wed Dec  3 16:36:37 2014
@@ -13,9 +13,11 @@
 
 #include "llvm/Linker/Linker.h"
 #include "llvm-c/Linker.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -36,8 +38,6 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 namespace {
-typedef SmallPtrSet<StructType *, 32> TypeSet;
-
 class TypeMapTy : public ValueMapTypeRemapper {
   /// This is a mapping from a source type to a destination type to use.
   DenseMap<Type*, Type*> MappedTypes;
@@ -58,9 +58,10 @@ class TypeMapTy : public ValueMapTypeRem
   SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
 
 public:
-  TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
+  TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet)
+      : DstStructTypesSet(DstStructTypesSet) {}
 
-  TypeSet &DstStructTypesSet;
+  Linker::IdentifiedStructTypeSet &DstStructTypesSet;
   /// Indicate that the specified type in the destination module is conceptually
   /// equivalent to the specified type in the source module.
   void addTypeMapping(Type *DstTy, Type *SrcTy);
@@ -72,6 +73,9 @@ public:
   /// Return the mapped type to use for the specified input type from the
   /// source module.
   Type *get(Type *SrcTy);
+  Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
+
+  void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
 
   FunctionType *get(FunctionType *T) {
     return cast<FunctionType>(get((Type *)T));
@@ -225,124 +229,127 @@ void TypeMapTy::linkDefinedTypeBodies()
   DstResolvedOpaqueTypes.clear();
 }
 
-Type *TypeMapTy::get(Type *Ty) {
-#ifndef NDEBUG
-  for (auto &Pair : MappedTypes) {
-    assert(!(Pair.first != Ty && Pair.second == Ty) &&
-           "mapping to a source type");
+void TypeMapTy::finishType(StructType *DTy, StructType *STy,
+                           ArrayRef<Type *> ETypes) {
+  DTy->setBody(ETypes, STy->isPacked());
+
+  // Steal STy's name.
+  if (STy->hasName()) {
+    SmallString<16> TmpName = STy->getName();
+    STy->setName("");
+    DTy->setName(TmpName);
   }
-#endif
 
+  DstStructTypesSet.addNonOpaque(DTy);
+}
+
+Type *TypeMapTy::get(Type *Ty) {
+  SmallPtrSet<StructType *, 8> Visited;
+  return get(Ty, Visited);
+}
+
+Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
   // If we already have an entry for this type, return it.
   Type **Entry = &MappedTypes[Ty];
   if (*Entry)
     return *Entry;
 
-  // If this is not a named struct type, then just map all of the elements and
+  // These are types that LLVM itself will unique.
+  bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
+
+#ifndef NDEBUG
+  if (!IsUniqued) {
+    for (auto &Pair : MappedTypes) {
+      assert(!(Pair.first != Ty && Pair.second == Ty) &&
+             "mapping to a source type");
+    }
+  }
+#endif
+
+  if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
+    StructType *DTy = StructType::create(Ty->getContext());
+    return *Entry = DTy;
+  }
+
+  // If this is not a recursive type, then just map all of the elements and
   // then rebuild the type from inside out.
-  if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) {
-    // If there are no element types to map, then the type is itself.  This is
-    // true for the anonymous {} struct, things like 'float', integers, etc.
-    if (Ty->getNumContainedTypes() == 0)
-      return *Entry = Ty;
+  SmallVector<Type *, 4> ElementTypes;
+
+  // If there are no element types to map, then the type is itself.  This is
+  // true for the anonymous {} struct, things like 'float', integers, etc.
+  if (Ty->getNumContainedTypes() == 0 && IsUniqued)
+    return *Entry = Ty;
+
+  // Remap all of the elements, keeping track of whether any of them change.
+  bool AnyChange = false;
+  ElementTypes.resize(Ty->getNumContainedTypes());
+  for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
+    ElementTypes[I] = get(Ty->getContainedType(I), Visited);
+    AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
+  }
+
+  // If we found our type while recursively processing stuff, just use it.
+  Entry = &MappedTypes[Ty];
+  if (*Entry) {
+    if (auto *DTy = dyn_cast<StructType>(*Entry)) {
+      if (DTy->isOpaque()) {
+        auto *STy = cast<StructType>(Ty);
+        finishType(DTy, STy, ElementTypes);
+      }
+    }
+    return *Entry;
+  }
 
-    // Remap all of the elements, keeping track of whether any of them change.
-    bool AnyChange = false;
-    SmallVector<Type*, 4> ElementTypes;
-    ElementTypes.resize(Ty->getNumContainedTypes());
-    for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
-      ElementTypes[I] = get(Ty->getContainedType(I));
-      AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
-    }
-
-    // If we found our type while recursively processing stuff, just use it.
-    Entry = &MappedTypes[Ty];
-    if (*Entry)
-      return *Entry;
-
-    // If all of the element types mapped directly over, then the type is usable
-    // as-is.
-    if (!AnyChange)
+  // If all of the element types mapped directly over and the type is not
+  // a nomed struct, then the type is usable as-is.
+  if (!AnyChange && IsUniqued)
+    return *Entry = Ty;
+
+  // Otherwise, rebuild a modified type.
+  switch (Ty->getTypeID()) {
+  default:
+    llvm_unreachable("unknown derived type to remap");
+  case Type::ArrayTyID:
+    return *Entry = ArrayType::get(ElementTypes[0],
+                                   cast<ArrayType>(Ty)->getNumElements());
+  case Type::VectorTyID:
+    return *Entry = VectorType::get(ElementTypes[0],
+                                    cast<VectorType>(Ty)->getNumElements());
+  case Type::PointerTyID:
+    return *Entry = PointerType::get(ElementTypes[0],
+                                     cast<PointerType>(Ty)->getAddressSpace());
+  case Type::FunctionTyID:
+    return *Entry = FunctionType::get(ElementTypes[0],
+                                      makeArrayRef(ElementTypes).slice(1),
+                                      cast<FunctionType>(Ty)->isVarArg());
+  case Type::StructTyID: {
+    auto *STy = cast<StructType>(Ty);
+    bool IsPacked = STy->isPacked();
+    if (IsUniqued)
+      return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
+
+    // If the type is opaque, we can just use it directly.
+    if (STy->isOpaque()) {
+      DstStructTypesSet.addOpaque(STy);
       return *Entry = Ty;
+    }
 
-    // Otherwise, rebuild a modified type.
-    switch (Ty->getTypeID()) {
-    default:
-      llvm_unreachable("unknown derived type to remap");
-    case Type::ArrayTyID:
-      return *Entry = ArrayType::get(ElementTypes[0],
-                                     cast<ArrayType>(Ty)->getNumElements());
-    case Type::VectorTyID:
-      return *Entry = VectorType::get(ElementTypes[0],
-                                      cast<VectorType>(Ty)->getNumElements());
-    case Type::PointerTyID:
-      return *Entry = PointerType::get(
-                 ElementTypes[0], cast<PointerType>(Ty)->getAddressSpace());
-    case Type::FunctionTyID:
-      return *Entry = FunctionType::get(ElementTypes[0],
-                                        makeArrayRef(ElementTypes).slice(1),
-                                        cast<FunctionType>(Ty)->isVarArg());
-    case Type::StructTyID:
-      // Note that this is only reached for anonymous structs.
-      return *Entry = StructType::get(Ty->getContext(), ElementTypes,
-                                      cast<StructType>(Ty)->isPacked());
-    }
-  }
-
-  // Otherwise, this is an unmapped named struct.  If the struct can be directly
-  // mapped over, just use it as-is.  This happens in a case when the linked-in
-  // module has something like:
-  //   %T = type {%T*, i32}
-  //   @GV = global %T* null
-  // where T does not exist at all in the destination module.
-  //
-  // The other case we watch for is when the type is not in the destination
-  // module, but that it has to be rebuilt because it refers to something that
-  // is already mapped.  For example, if the destination module has:
-  //  %A = type { i32 }
-  // and the source module has something like
-  //  %A' = type { i32 }
-  //  %B = type { %A'* }
-  //  @GV = global %B* null
-  // then we want to create a new type: "%B = type { %A*}" and have it take the
-  // pristine "%B" name from the source module.
-  //
-  // To determine which case this is, we have to recursively walk the type graph
-  // speculating that we'll be able to reuse it unmodified.  Only if this is
-  // safe would we map the entire thing over.  Because this is an optimization,
-  // and is not required for the prettiness of the linked module, we just skip
-  // it and always rebuild a type here.
-  StructType *STy = cast<StructType>(Ty);
-
-  // If the type is opaque, we can just use it directly.
-  if (STy->isOpaque()) {
-    // A named structure type from src module is used. Add it to the Set of
-    // identified structs in the destination module.
-    DstStructTypesSet.insert(STy);
-    return *Entry = STy;
-  }
-
-  // Otherwise we create a new type.
-  StructType *DTy = StructType::create(STy->getContext());
-  // A new identified structure type was created. Add it to the set of
-  // identified structs in the destination module.
-  DstStructTypesSet.insert(DTy);
-  *Entry = DTy;
-
-  SmallVector<Type*, 4> ElementTypes;
-  ElementTypes.resize(STy->getNumElements());
-  for (unsigned I = 0, E = ElementTypes.size(); I != E; ++I)
-    ElementTypes[I] = get(STy->getElementType(I));
-  DTy->setBody(ElementTypes, STy->isPacked());
+    if (StructType *OldT =
+            DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
+      STy->setName("");
+      return *Entry = OldT;
+    }
 
-  // Steal STy's name.
-  if (STy->hasName()) {
-    SmallString<16> TmpName = STy->getName();
-    STy->setName("");
-    DTy->setName(TmpName);
-  }
+    if (!AnyChange) {
+      DstStructTypesSet.addNonOpaque(STy);
+      return *Entry = Ty;
+    }
 
-  return DTy;
+    StructType *DTy = StructType::create(Ty->getContext());
+    finishType(DTy, STy, ElementTypes);
+    return *Entry = DTy;
+  }
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -412,7 +419,7 @@ class ModuleLinker {
   Linker::DiagnosticHandlerFunction DiagnosticHandler;
 
 public:
-  ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM,
+  ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM,
                Linker::DiagnosticHandlerFunction DiagnosticHandler)
       : DstM(dstM), SrcM(srcM), TypeMap(Set),
         ValMaterializer(TypeMap, DstM, LazilyLinkFunctions),
@@ -825,7 +832,7 @@ void ModuleLinker::computeTypeMapping()
     // we prefer to take the '%C' version. So we are then left with both
     // '%C.1' and '%C' being used for the same types. This leads to some
     // variables using one type and some using the other.
-    if (TypeMap.DstStructTypesSet.count(DST))
+    if (TypeMap.DstStructTypesSet.hasType(DST))
       TypeMap.addTypeMapping(DST, ST);
   }
 
@@ -1589,13 +1596,101 @@ bool ModuleLinker::run() {
   return false;
 }
 
+Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
+    : ETypes(E), IsPacked(P) {}
+
+Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
+    : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
+
+bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
+  if (IsPacked != That.IsPacked)
+    return false;
+  if (ETypes != That.ETypes)
+    return false;
+  return true;
+}
+
+bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
+  return !this->operator==(That);
+}
+
+StructType *Linker::StructTypeKeyInfo::getEmptyKey() {
+  return DenseMapInfo<StructType *>::getEmptyKey();
+}
+
+StructType *Linker::StructTypeKeyInfo::getTombstoneKey() {
+  return DenseMapInfo<StructType *>::getTombstoneKey();
+}
+
+unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
+  return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
+                      Key.IsPacked);
+}
+
+unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) {
+  return getHashValue(KeyTy(ST));
+}
+
+bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
+                                        const StructType *RHS) {
+  if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+    return false;
+  return LHS == KeyTy(RHS);
+}
+
+bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS,
+                                        const StructType *RHS) {
+  if (RHS == getEmptyKey())
+    return LHS == getEmptyKey();
+
+  if (RHS == getTombstoneKey())
+    return LHS == getTombstoneKey();
+
+  return KeyTy(LHS) == KeyTy(RHS);
+}
+
+void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
+  assert(!Ty->isOpaque());
+  bool &Entry = NonOpaqueStructTypes[Ty];
+  Entry = true;
+}
+
+void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
+  assert(Ty->isOpaque());
+  OpaqueStructTypes.insert(Ty);
+}
+
+StructType *
+Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
+                                               bool IsPacked) {
+  Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
+  auto I = NonOpaqueStructTypes.find_as(Key);
+  if (I == NonOpaqueStructTypes.end())
+    return nullptr;
+  return I->first;
+}
+
+bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) {
+  if (Ty->isOpaque())
+    return OpaqueStructTypes.count(Ty);
+  auto I = NonOpaqueStructTypes.find(Ty);
+  if (I == NonOpaqueStructTypes.end())
+    return false;
+  return I->first == Ty;
+}
+
 void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
   this->Composite = M;
   this->DiagnosticHandler = DiagnosticHandler;
 
   TypeFinder StructTypes;
   StructTypes.run(*M, true);
-  IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
+  for (StructType *Ty : StructTypes) {
+    if (Ty->isOpaque())
+      IdentifiedStructTypes.addOpaque(Ty);
+    else
+      IdentifiedStructTypes.addNonOpaque(Ty);
+  }
 }
 
 Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {

Modified: llvm/trunk/test/Linker/type-unique-src-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Linker/type-unique-src-type.ll?rev=223278&r1=223277&r2=223278&view=diff
==============================================================================
--- llvm/trunk/test/Linker/type-unique-src-type.ll (original)
+++ llvm/trunk/test/Linker/type-unique-src-type.ll Wed Dec  3 16:36:37 2014
@@ -9,7 +9,9 @@
 ; CHECK: %C.0 = type { %B }
 ; CHECK-NEXT: %B = type { %A }
 ; CHECK-NEXT: %A = type { i8 }
-; CHECK-NEXT: %C = type { %B }
+
+; CHECK: @g1 = external global %C.0
+; CHECK:  getelementptr %C.0* null, i64 0, i32 0, i32 0
 
 %A   = type { i8 }
 %B   = type { %A }





More information about the llvm-commits mailing list