[llvm] r223278 - Split the set of identified struct types into opaque and non-opaque ones.
Sanjoy Das
sanjoy at playingwithpointers.com
Mon Dec 8 20:08:20 PST 2014
Does this change mean LLVM will now structurally unique named struct types?
Asking because the LangRef says: "Literal types are uniqued
structurally, but identified types are never uniqued."
On Wed, Dec 3, 2014 at 2:36 PM, Rafael Espindola
<rafael.espindola at gmail.com> wrote:
> Author: rafael
> Date: Wed Dec 3 16:36:37 2014
> New Revision: 223278
>
> URL: http://llvm.org/viewvc/llvm-project?rev=223278&view=rev
> Log:
> Split the set of identified struct types into opaque and non-opaque ones.
>
> The non-opaque part can be structurally uniqued. To keep this to just
> a hash lookup, we don't try to unique cyclic types.
>
> Also change the type mapping algorithm to be optimistic about a type
> not being recursive and only create a new type when proven to be wrong.
> This is not as strong as trying to speculate that we can keep the source
> type, but is simpler (no speculation to revert) and more powerfull
> than what we had before (we don't copy non-recursive types at least).
>
> I initially wrote this to try to replace the name based type merging.
> It is not strong enough to replace it, but is is a useful addition.
>
> With this patch the number of named struct types is a clang lto bootstrap goes
> from 49674 to 15986.
>
> Modified:
> llvm/trunk/include/llvm/Linker/Linker.h
> llvm/trunk/lib/Linker/LinkModules.cpp
> llvm/trunk/test/Linker/type-unique-src-type.ll
>
> Modified: llvm/trunk/include/llvm/Linker/Linker.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Linker/Linker.h?rev=223278&r1=223277&r2=223278&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Linker/Linker.h (original)
> +++ llvm/trunk/include/llvm/Linker/Linker.h Wed Dec 3 16:36:37 2014
> @@ -10,7 +10,9 @@
> #ifndef LLVM_LINKER_LINKER_H
> #define LLVM_LINKER_LINKER_H
>
> -#include "llvm/ADT/SmallPtrSet.h"
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/DenseSet.h"
>
> #include <functional>
>
> @@ -18,6 +20,7 @@ namespace llvm {
> class DiagnosticInfo;
> class Module;
> class StructType;
> +class Type;
>
> /// This class provides the core functionality of linking in LLVM. It keeps a
> /// pointer to the merged module so far. It doesn't take ownership of the
> @@ -27,6 +30,40 @@ class Linker {
> public:
> typedef std::function<void(const DiagnosticInfo &)> DiagnosticHandlerFunction;
>
> + struct StructTypeKeyInfo {
> + struct KeyTy {
> + ArrayRef<Type *> ETypes;
> + bool IsPacked;
> + KeyTy(ArrayRef<Type *> E, bool P);
> + KeyTy(const StructType *ST);
> + bool operator==(const KeyTy &that) const;
> + bool operator!=(const KeyTy &that) const;
> + };
> + static StructType *getEmptyKey();
> + static StructType *getTombstoneKey();
> + static unsigned getHashValue(const KeyTy &Key);
> + static unsigned getHashValue(const StructType *ST);
> + static bool isEqual(const KeyTy &LHS, const StructType *RHS);
> + static bool isEqual(const StructType *LHS, const StructType *RHS);
> + };
> +
> + typedef DenseMap<StructType *, bool, StructTypeKeyInfo>
> + NonOpaqueStructTypeSet;
> + typedef DenseSet<StructType *> OpaqueStructTypeSet;
> +
> + struct IdentifiedStructTypeSet {
> + // The set of opaque types is the composite module.
> + OpaqueStructTypeSet OpaqueStructTypes;
> +
> + // The set of identified but non opaque structures in the composite module.
> + NonOpaqueStructTypeSet NonOpaqueStructTypes;
> +
> + void addNonOpaque(StructType *Ty);
> + void addOpaque(StructType *Ty);
> + StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
> + bool hasType(StructType *Ty);
> + };
> +
> Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
> Linker(Module *M);
> ~Linker();
> @@ -46,7 +83,9 @@ public:
> private:
> void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
> Module *Composite;
> - SmallPtrSet<StructType *, 32> IdentifiedStructTypes;
> +
> + IdentifiedStructTypeSet IdentifiedStructTypes;
> +
> DiagnosticHandlerFunction DiagnosticHandler;
> };
>
>
> Modified: llvm/trunk/lib/Linker/LinkModules.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Linker/LinkModules.cpp?rev=223278&r1=223277&r2=223278&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Linker/LinkModules.cpp (original)
> +++ llvm/trunk/lib/Linker/LinkModules.cpp Wed Dec 3 16:36:37 2014
> @@ -13,9 +13,11 @@
>
> #include "llvm/Linker/Linker.h"
> #include "llvm-c/Linker.h"
> +#include "llvm/ADT/Hashing.h"
> #include "llvm/ADT/Optional.h"
> #include "llvm/ADT/SetVector.h"
> #include "llvm/ADT/SmallString.h"
> +#include "llvm/ADT/Statistic.h"
> #include "llvm/IR/Constants.h"
> #include "llvm/IR/DiagnosticInfo.h"
> #include "llvm/IR/DiagnosticPrinter.h"
> @@ -36,8 +38,6 @@ using namespace llvm;
> //===----------------------------------------------------------------------===//
>
> namespace {
> -typedef SmallPtrSet<StructType *, 32> TypeSet;
> -
> class TypeMapTy : public ValueMapTypeRemapper {
> /// This is a mapping from a source type to a destination type to use.
> DenseMap<Type*, Type*> MappedTypes;
> @@ -58,9 +58,10 @@ class TypeMapTy : public ValueMapTypeRem
> SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
>
> public:
> - TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
> + TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet)
> + : DstStructTypesSet(DstStructTypesSet) {}
>
> - TypeSet &DstStructTypesSet;
> + Linker::IdentifiedStructTypeSet &DstStructTypesSet;
> /// Indicate that the specified type in the destination module is conceptually
> /// equivalent to the specified type in the source module.
> void addTypeMapping(Type *DstTy, Type *SrcTy);
> @@ -72,6 +73,9 @@ public:
> /// Return the mapped type to use for the specified input type from the
> /// source module.
> Type *get(Type *SrcTy);
> + Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
> +
> + void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
>
> FunctionType *get(FunctionType *T) {
> return cast<FunctionType>(get((Type *)T));
> @@ -225,124 +229,127 @@ void TypeMapTy::linkDefinedTypeBodies()
> DstResolvedOpaqueTypes.clear();
> }
>
> -Type *TypeMapTy::get(Type *Ty) {
> -#ifndef NDEBUG
> - for (auto &Pair : MappedTypes) {
> - assert(!(Pair.first != Ty && Pair.second == Ty) &&
> - "mapping to a source type");
> +void TypeMapTy::finishType(StructType *DTy, StructType *STy,
> + ArrayRef<Type *> ETypes) {
> + DTy->setBody(ETypes, STy->isPacked());
> +
> + // Steal STy's name.
> + if (STy->hasName()) {
> + SmallString<16> TmpName = STy->getName();
> + STy->setName("");
> + DTy->setName(TmpName);
> }
> -#endif
>
> + DstStructTypesSet.addNonOpaque(DTy);
> +}
> +
> +Type *TypeMapTy::get(Type *Ty) {
> + SmallPtrSet<StructType *, 8> Visited;
> + return get(Ty, Visited);
> +}
> +
> +Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
> // If we already have an entry for this type, return it.
> Type **Entry = &MappedTypes[Ty];
> if (*Entry)
> return *Entry;
>
> - // If this is not a named struct type, then just map all of the elements and
> + // These are types that LLVM itself will unique.
> + bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
> +
> +#ifndef NDEBUG
> + if (!IsUniqued) {
> + for (auto &Pair : MappedTypes) {
> + assert(!(Pair.first != Ty && Pair.second == Ty) &&
> + "mapping to a source type");
> + }
> + }
> +#endif
> +
> + if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
> + StructType *DTy = StructType::create(Ty->getContext());
> + return *Entry = DTy;
> + }
> +
> + // If this is not a recursive type, then just map all of the elements and
> // then rebuild the type from inside out.
> - if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) {
> - // If there are no element types to map, then the type is itself. This is
> - // true for the anonymous {} struct, things like 'float', integers, etc.
> - if (Ty->getNumContainedTypes() == 0)
> - return *Entry = Ty;
> + SmallVector<Type *, 4> ElementTypes;
> +
> + // If there are no element types to map, then the type is itself. This is
> + // true for the anonymous {} struct, things like 'float', integers, etc.
> + if (Ty->getNumContainedTypes() == 0 && IsUniqued)
> + return *Entry = Ty;
> +
> + // Remap all of the elements, keeping track of whether any of them change.
> + bool AnyChange = false;
> + ElementTypes.resize(Ty->getNumContainedTypes());
> + for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
> + ElementTypes[I] = get(Ty->getContainedType(I), Visited);
> + AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
> + }
> +
> + // If we found our type while recursively processing stuff, just use it.
> + Entry = &MappedTypes[Ty];
> + if (*Entry) {
> + if (auto *DTy = dyn_cast<StructType>(*Entry)) {
> + if (DTy->isOpaque()) {
> + auto *STy = cast<StructType>(Ty);
> + finishType(DTy, STy, ElementTypes);
> + }
> + }
> + return *Entry;
> + }
>
> - // Remap all of the elements, keeping track of whether any of them change.
> - bool AnyChange = false;
> - SmallVector<Type*, 4> ElementTypes;
> - ElementTypes.resize(Ty->getNumContainedTypes());
> - for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
> - ElementTypes[I] = get(Ty->getContainedType(I));
> - AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
> - }
> -
> - // If we found our type while recursively processing stuff, just use it.
> - Entry = &MappedTypes[Ty];
> - if (*Entry)
> - return *Entry;
> -
> - // If all of the element types mapped directly over, then the type is usable
> - // as-is.
> - if (!AnyChange)
> + // If all of the element types mapped directly over and the type is not
> + // a nomed struct, then the type is usable as-is.
> + if (!AnyChange && IsUniqued)
> + return *Entry = Ty;
> +
> + // Otherwise, rebuild a modified type.
> + switch (Ty->getTypeID()) {
> + default:
> + llvm_unreachable("unknown derived type to remap");
> + case Type::ArrayTyID:
> + return *Entry = ArrayType::get(ElementTypes[0],
> + cast<ArrayType>(Ty)->getNumElements());
> + case Type::VectorTyID:
> + return *Entry = VectorType::get(ElementTypes[0],
> + cast<VectorType>(Ty)->getNumElements());
> + case Type::PointerTyID:
> + return *Entry = PointerType::get(ElementTypes[0],
> + cast<PointerType>(Ty)->getAddressSpace());
> + case Type::FunctionTyID:
> + return *Entry = FunctionType::get(ElementTypes[0],
> + makeArrayRef(ElementTypes).slice(1),
> + cast<FunctionType>(Ty)->isVarArg());
> + case Type::StructTyID: {
> + auto *STy = cast<StructType>(Ty);
> + bool IsPacked = STy->isPacked();
> + if (IsUniqued)
> + return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
> +
> + // If the type is opaque, we can just use it directly.
> + if (STy->isOpaque()) {
> + DstStructTypesSet.addOpaque(STy);
> return *Entry = Ty;
> + }
>
> - // Otherwise, rebuild a modified type.
> - switch (Ty->getTypeID()) {
> - default:
> - llvm_unreachable("unknown derived type to remap");
> - case Type::ArrayTyID:
> - return *Entry = ArrayType::get(ElementTypes[0],
> - cast<ArrayType>(Ty)->getNumElements());
> - case Type::VectorTyID:
> - return *Entry = VectorType::get(ElementTypes[0],
> - cast<VectorType>(Ty)->getNumElements());
> - case Type::PointerTyID:
> - return *Entry = PointerType::get(
> - ElementTypes[0], cast<PointerType>(Ty)->getAddressSpace());
> - case Type::FunctionTyID:
> - return *Entry = FunctionType::get(ElementTypes[0],
> - makeArrayRef(ElementTypes).slice(1),
> - cast<FunctionType>(Ty)->isVarArg());
> - case Type::StructTyID:
> - // Note that this is only reached for anonymous structs.
> - return *Entry = StructType::get(Ty->getContext(), ElementTypes,
> - cast<StructType>(Ty)->isPacked());
> - }
> - }
> -
> - // Otherwise, this is an unmapped named struct. If the struct can be directly
> - // mapped over, just use it as-is. This happens in a case when the linked-in
> - // module has something like:
> - // %T = type {%T*, i32}
> - // @GV = global %T* null
> - // where T does not exist at all in the destination module.
> - //
> - // The other case we watch for is when the type is not in the destination
> - // module, but that it has to be rebuilt because it refers to something that
> - // is already mapped. For example, if the destination module has:
> - // %A = type { i32 }
> - // and the source module has something like
> - // %A' = type { i32 }
> - // %B = type { %A'* }
> - // @GV = global %B* null
> - // then we want to create a new type: "%B = type { %A*}" and have it take the
> - // pristine "%B" name from the source module.
> - //
> - // To determine which case this is, we have to recursively walk the type graph
> - // speculating that we'll be able to reuse it unmodified. Only if this is
> - // safe would we map the entire thing over. Because this is an optimization,
> - // and is not required for the prettiness of the linked module, we just skip
> - // it and always rebuild a type here.
> - StructType *STy = cast<StructType>(Ty);
> -
> - // If the type is opaque, we can just use it directly.
> - if (STy->isOpaque()) {
> - // A named structure type from src module is used. Add it to the Set of
> - // identified structs in the destination module.
> - DstStructTypesSet.insert(STy);
> - return *Entry = STy;
> - }
> -
> - // Otherwise we create a new type.
> - StructType *DTy = StructType::create(STy->getContext());
> - // A new identified structure type was created. Add it to the set of
> - // identified structs in the destination module.
> - DstStructTypesSet.insert(DTy);
> - *Entry = DTy;
> -
> - SmallVector<Type*, 4> ElementTypes;
> - ElementTypes.resize(STy->getNumElements());
> - for (unsigned I = 0, E = ElementTypes.size(); I != E; ++I)
> - ElementTypes[I] = get(STy->getElementType(I));
> - DTy->setBody(ElementTypes, STy->isPacked());
> + if (StructType *OldT =
> + DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
> + STy->setName("");
> + return *Entry = OldT;
> + }
>
> - // Steal STy's name.
> - if (STy->hasName()) {
> - SmallString<16> TmpName = STy->getName();
> - STy->setName("");
> - DTy->setName(TmpName);
> - }
> + if (!AnyChange) {
> + DstStructTypesSet.addNonOpaque(STy);
> + return *Entry = Ty;
> + }
>
> - return DTy;
> + StructType *DTy = StructType::create(Ty->getContext());
> + finishType(DTy, STy, ElementTypes);
> + return *Entry = DTy;
> + }
> + }
> }
>
> //===----------------------------------------------------------------------===//
> @@ -412,7 +419,7 @@ class ModuleLinker {
> Linker::DiagnosticHandlerFunction DiagnosticHandler;
>
> public:
> - ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM,
> + ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM,
> Linker::DiagnosticHandlerFunction DiagnosticHandler)
> : DstM(dstM), SrcM(srcM), TypeMap(Set),
> ValMaterializer(TypeMap, DstM, LazilyLinkFunctions),
> @@ -825,7 +832,7 @@ void ModuleLinker::computeTypeMapping()
> // we prefer to take the '%C' version. So we are then left with both
> // '%C.1' and '%C' being used for the same types. This leads to some
> // variables using one type and some using the other.
> - if (TypeMap.DstStructTypesSet.count(DST))
> + if (TypeMap.DstStructTypesSet.hasType(DST))
> TypeMap.addTypeMapping(DST, ST);
> }
>
> @@ -1589,13 +1596,101 @@ bool ModuleLinker::run() {
> return false;
> }
>
> +Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
> + : ETypes(E), IsPacked(P) {}
> +
> +Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
> + : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
> +
> +bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
> + if (IsPacked != That.IsPacked)
> + return false;
> + if (ETypes != That.ETypes)
> + return false;
> + return true;
> +}
> +
> +bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
> + return !this->operator==(That);
> +}
> +
> +StructType *Linker::StructTypeKeyInfo::getEmptyKey() {
> + return DenseMapInfo<StructType *>::getEmptyKey();
> +}
> +
> +StructType *Linker::StructTypeKeyInfo::getTombstoneKey() {
> + return DenseMapInfo<StructType *>::getTombstoneKey();
> +}
> +
> +unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
> + return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
> + Key.IsPacked);
> +}
> +
> +unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) {
> + return getHashValue(KeyTy(ST));
> +}
> +
> +bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
> + const StructType *RHS) {
> + if (RHS == getEmptyKey() || RHS == getTombstoneKey())
> + return false;
> + return LHS == KeyTy(RHS);
> +}
> +
> +bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS,
> + const StructType *RHS) {
> + if (RHS == getEmptyKey())
> + return LHS == getEmptyKey();
> +
> + if (RHS == getTombstoneKey())
> + return LHS == getTombstoneKey();
> +
> + return KeyTy(LHS) == KeyTy(RHS);
> +}
> +
> +void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
> + assert(!Ty->isOpaque());
> + bool &Entry = NonOpaqueStructTypes[Ty];
> + Entry = true;
> +}
> +
> +void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
> + assert(Ty->isOpaque());
> + OpaqueStructTypes.insert(Ty);
> +}
> +
> +StructType *
> +Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
> + bool IsPacked) {
> + Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
> + auto I = NonOpaqueStructTypes.find_as(Key);
> + if (I == NonOpaqueStructTypes.end())
> + return nullptr;
> + return I->first;
> +}
> +
> +bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) {
> + if (Ty->isOpaque())
> + return OpaqueStructTypes.count(Ty);
> + auto I = NonOpaqueStructTypes.find(Ty);
> + if (I == NonOpaqueStructTypes.end())
> + return false;
> + return I->first == Ty;
> +}
> +
> void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
> this->Composite = M;
> this->DiagnosticHandler = DiagnosticHandler;
>
> TypeFinder StructTypes;
> StructTypes.run(*M, true);
> - IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
> + for (StructType *Ty : StructTypes) {
> + if (Ty->isOpaque())
> + IdentifiedStructTypes.addOpaque(Ty);
> + else
> + IdentifiedStructTypes.addNonOpaque(Ty);
> + }
> }
>
> Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
>
> Modified: llvm/trunk/test/Linker/type-unique-src-type.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Linker/type-unique-src-type.ll?rev=223278&r1=223277&r2=223278&view=diff
> ==============================================================================
> --- llvm/trunk/test/Linker/type-unique-src-type.ll (original)
> +++ llvm/trunk/test/Linker/type-unique-src-type.ll Wed Dec 3 16:36:37 2014
> @@ -9,7 +9,9 @@
> ; CHECK: %C.0 = type { %B }
> ; CHECK-NEXT: %B = type { %A }
> ; CHECK-NEXT: %A = type { i8 }
> -; CHECK-NEXT: %C = type { %B }
> +
> +; CHECK: @g1 = external global %C.0
> +; CHECK: getelementptr %C.0* null, i64 0, i32 0, i32 0
>
> %A = type { i8 }
> %B = type { %A }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list