[llvm] r340663 - Add data structure to form equivalence classes of mangled names.

Richard Smith via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 24 15:31:52 PDT 2018


Author: rsmith
Date: Fri Aug 24 15:31:51 2018
New Revision: 340663

URL: http://llvm.org/viewvc/llvm-project?rev=340663&view=rev
Log:
Add data structure to form equivalence classes of mangled names.

Summary:
Given a set of equivalent name fragments, this mechanism determines whether two
mangled names are equivalent. The intent is to use this for fuzzy matching of
profile data against the program after certain refactorings are performed.

Reviewers: erik.pilkington, dlj

Subscribers: mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D50935

Added:
    llvm/trunk/include/llvm/Support/ItaniumManglingCanonicalizer.h
    llvm/trunk/lib/Support/ItaniumManglingCanonicalizer.cpp
    llvm/trunk/unittests/Support/ItaniumManglingCanonicalizerTest.cpp
Modified:
    llvm/trunk/include/llvm/Demangle/ItaniumDemangle.h
    llvm/trunk/lib/Support/CMakeLists.txt
    llvm/trunk/unittests/Support/CMakeLists.txt

Modified: llvm/trunk/include/llvm/Demangle/ItaniumDemangle.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Demangle/ItaniumDemangle.h?rev=340663&r1=340662&r2=340663&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Demangle/ItaniumDemangle.h (original)
+++ llvm/trunk/include/llvm/Demangle/ItaniumDemangle.h Fri Aug 24 15:31:51 2018
@@ -2160,7 +2160,7 @@ struct Db {
     ASTAllocator.reset();
   }
 
-  template <class T, class... Args> T *make(Args &&... args) {
+  template <class T, class... Args> Node *make(Args &&... args) {
     return ASTAllocator.template makeNode<T>(std::forward<Args>(args)...);
   }
 
@@ -4948,8 +4948,11 @@ template<typename Alloc> Node *Db<Alloc>
   // <template-arg> further ahead in the mangled name (currently just conversion
   // operator types), then we should only look it up in the right context.
   if (PermitForwardTemplateReferences) {
-    ForwardTemplateRefs.push_back(make<ForwardTemplateReference>(Index));
-    return ForwardTemplateRefs.back();
+    Node *ForwardRef = make<ForwardTemplateReference>(Index);
+    assert(ForwardRef->getKind() == Node::KForwardTemplateReference);
+    ForwardTemplateRefs.push_back(
+        static_cast<ForwardTemplateReference *>(ForwardRef));
+    return ForwardRef;
   }
 
   if (Index >= TemplateParams.size())

Added: llvm/trunk/include/llvm/Support/ItaniumManglingCanonicalizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ItaniumManglingCanonicalizer.h?rev=340663&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/ItaniumManglingCanonicalizer.h (added)
+++ llvm/trunk/include/llvm/Support/ItaniumManglingCanonicalizer.h Fri Aug 24 15:31:51 2018
@@ -0,0 +1,87 @@
+//===--- ItaniumManglingCanonicalizer.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a class for computing equivalence classes of mangled names
+// given a set of equivalences between name fragments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+#define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstddef>
+
+namespace llvm {
+/// Canonicalizer for mangled names.
+///
+/// This class allows specifying a list of "equivalent" manglings. For example,
+/// you can specify that Ss is equivalent to
+///   NSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE
+/// and then manglings that refer to libstdc++'s 'std::string' will be
+/// considered equivalent to manglings that are the same except that they refer
+/// to libc++'s 'std::string'.
+///
+/// This can be used when data (eg, profiling data) is available for a version
+/// of a program built in a different configuration, with correspondingly
+/// different manglings.
+class ItaniumManglingCanonicalizer {
+public:
+  ItaniumManglingCanonicalizer();
+  ItaniumManglingCanonicalizer(const ItaniumManglingCanonicalizer &) = delete;
+  void operator=(const ItaniumManglingCanonicalizer &) = delete;
+  ~ItaniumManglingCanonicalizer();
+
+  enum class EquivalenceError {
+    Success,
+
+    /// Both the equivalent manglings have already been used as components of
+    /// some other mangling we've looked at. It's too late to add this
+    /// equivalence.
+    ManglingAlreadyUsed,
+
+    /// The first equivalent mangling is invalid.
+    InvalidFirstMangling,
+
+    /// The second equivalent mangling is invalid.
+    InvalidSecondMangling,
+  };
+
+  enum class FragmentKind {
+    /// The mangling fragment is a <name> (or a predefined <substitution>).
+    Name,
+    /// The mangling fragment is a <type>.
+    Type,
+    /// The mangling fragment is an <encoding>.
+    Encoding,
+  };
+
+  /// Add an equivalence between \p First and \p Second. Both manglings must
+  /// live at least as long as the canonicalizer.
+  EquivalenceError addEquivalence(FragmentKind Kind, StringRef First,
+                                  StringRef Second);
+
+  using Key = uintptr_t;
+
+  /// Form a canonical key for the specified mangling. They key will be the
+  /// same for all equivalent manglings, and different for any two
+  /// non-equivalent manglings, but is otherwise unspecified.
+  ///
+  /// Returns Key() if (and only if) the mangling is not a valid Itanium C++
+  /// ABI mangling.
+  Key canonicalize(StringRef Mangling);
+
+private:
+  struct Impl;
+  Impl *P;
+};
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H

Modified: llvm/trunk/lib/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=340663&r1=340662&r2=340663&view=diff
==============================================================================
--- llvm/trunk/lib/Support/CMakeLists.txt (original)
+++ llvm/trunk/lib/Support/CMakeLists.txt Fri Aug 24 15:31:51 2018
@@ -83,6 +83,7 @@ add_llvm_library(LLVMSupport
   InitLLVM.cpp
   IntEqClasses.cpp
   IntervalMap.cpp
+  ItaniumManglingCanonicalizer.cpp
   JamCRC.cpp
   JSON.cpp
   KnownBits.cpp

Added: llvm/trunk/lib/Support/ItaniumManglingCanonicalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/ItaniumManglingCanonicalizer.cpp?rev=340663&view=auto
==============================================================================
--- llvm/trunk/lib/Support/ItaniumManglingCanonicalizer.cpp (added)
+++ llvm/trunk/lib/Support/ItaniumManglingCanonicalizer.cpp Fri Aug 24 15:31:51 2018
@@ -0,0 +1,307 @@
+//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Demangle/ItaniumDemangle.h"
+#include "llvm/Support/Allocator.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+using llvm::itanium_demangle::ForwardTemplateReference;
+using llvm::itanium_demangle::Node;
+using llvm::itanium_demangle::NodeKind;
+
+namespace {
+struct FoldingSetNodeIDBuilder {
+  llvm::FoldingSetNodeID &ID;
+  void operator()(const Node *P) { ID.AddPointer(P); }
+  void operator()(StringView Str) {
+    ID.AddString(llvm::StringRef(Str.begin(), Str.size()));
+  }
+  template<typename T>
+  typename std::enable_if<std::is_integral<T>::value ||
+                          std::is_enum<T>::value>::type
+  operator()(T V) {
+    ID.AddInteger((unsigned long long)V);
+  }
+  void operator()(itanium_demangle::NodeOrString NS) {
+    if (NS.isNode()) {
+      ID.AddInteger(0);
+      (*this)(NS.asNode());
+    } else if (NS.isString()) {
+      ID.AddInteger(1);
+      (*this)(NS.asString());
+    } else {
+      ID.AddInteger(2);
+    }
+  }
+  void operator()(itanium_demangle::NodeArray A) {
+    ID.AddInteger(A.size());
+    for (const Node *N : A)
+      (*this)(N);
+  }
+};
+
+template<typename ...T>
+void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
+  FoldingSetNodeIDBuilder Builder = {ID};
+  Builder(K);
+  int VisitInOrder[] = {
+    (Builder(V), 0) ...,
+    0 // Avoid empty array if there are no arguments.
+  };
+  (void)VisitInOrder;
+}
+
+// FIXME: Convert this to a generic lambda when possible.
+template<typename NodeT> struct ProfileSpecificNode {
+  FoldingSetNodeID &ID;
+  template<typename ...T> void operator()(T ...V) {
+    profileCtor(ID, NodeKind<NodeT>::Kind, V...);
+  }
+};
+
+struct ProfileNode {
+  FoldingSetNodeID &ID;
+  template<typename NodeT> void operator()(const NodeT *N) {
+    N->match(ProfileSpecificNode<NodeT>{ID});
+  }
+};
+
+template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
+  llvm_unreachable("should never canonicalize a ForwardTemplateReference");
+};
+
+void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
+  N->visit(ProfileNode{ID});
+}
+
+class FoldingNodeAllocator {
+  class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
+  public:
+    // 'Node' in this context names the injected-class-name of the base class.
+    itanium_demangle::Node *getNode() {
+      return reinterpret_cast<itanium_demangle::Node *>(this + 1);
+    }
+    void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); }
+  };
+
+  BumpPtrAllocator RawAlloc;
+  llvm::FoldingSet<NodeHeader> Nodes;
+
+public:
+  void reset() {}
+
+  template<typename T, typename ...Args>
+  std::pair<Node*, bool> getOrCreateNode(Args &&...As) {
+    llvm::FoldingSetNodeID ID;
+    profileCtor(ID, NodeKind<T>::Kind, As...);
+
+    void *InsertPos;
+    if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
+      return {static_cast<T*>(Existing->getNode()), false};
+
+    static_assert(alignof(T) <= alignof(NodeHeader),
+                  "underaligned node header for specific node kind");
+    void *Storage =
+        RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader));
+    NodeHeader *New = new (Storage) NodeHeader;
+    T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
+    Nodes.InsertNode(New, InsertPos);
+    return {Result, true};
+  }
+
+  template<typename T, typename... Args>
+  Node *makeNode(Args &&...As) {
+    return getOrCreateNode<T>(std::forward<Args>(As)...).first;
+  }
+
+  void *allocateNodeArray(size_t sz) {
+    return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *));
+  }
+};
+
+// FIXME: Don't canonicalize forward template references for now, because they
+// contain state (the resolved template node) that's not known at their point
+// of creation.
+template<>
+std::pair<Node *, bool>
+FoldingNodeAllocator::getOrCreateNode<ForwardTemplateReference>(size_t &Index) {
+  return {new (RawAlloc.Allocate(sizeof(ForwardTemplateReference),
+                                 alignof(ForwardTemplateReference)))
+              ForwardTemplateReference(Index),
+          true};
+}
+
+class CanonicalizerAllocator : public FoldingNodeAllocator {
+  Node *MostRecentlyCreated = nullptr;
+  Node *TrackedNode = nullptr;
+  bool TrackedNodeIsUsed = false;
+  llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
+
+  template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
+    std::pair<Node *, bool> Result =
+        getOrCreateNode<T>(std::forward<Args>(As)...);
+    if (Result.second) {
+      // Node is new. Make a note of that.
+      MostRecentlyCreated = Result.first;
+    } else {
+      // Node is pre-existing; check if it's in our remapping table.
+      if (auto *N = Remappings.lookup(Result.first)) {
+        Result.first = N;
+        assert(Remappings.find(Result.first) == Remappings.end() &&
+               "should never need multiple remap steps");
+      }
+      if (Result.first == TrackedNode)
+        TrackedNodeIsUsed = true;
+    }
+    return Result.first;
+  }
+
+  /// Helper to allow makeNode to be partially-specialized on T.
+  template<typename T> struct MakeNodeImpl {
+    CanonicalizerAllocator &Self;
+    template<typename ...Args> Node *make(Args &&...As) {
+      return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
+    }
+  };
+
+public:
+  template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
+    return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
+  }
+
+  void reset() { MostRecentlyCreated = nullptr; }
+
+  void addRemapping(Node *A, Node *B) {
+    // Note, we don't need to check whether B is also remapped, because if it
+    // was we would have already remapped it when building it.
+    Remappings.insert(std::make_pair(A, B));
+  }
+
+  bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
+
+  void trackUsesOf(Node *N) {
+    TrackedNode = N;
+    TrackedNodeIsUsed = false;
+  }
+  bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
+};
+
+/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the
+/// other.
+template<>
+struct CanonicalizerAllocator::MakeNodeImpl<
+           itanium_demangle::StdQualifiedName> {
+  CanonicalizerAllocator &Self;
+  Node *make(Node *Child) {
+    Node *StdNamespace = Self.makeNode<itanium_demangle::NameType>("std");
+    if (!StdNamespace)
+      return nullptr;
+    return Self.makeNode<itanium_demangle::NestedName>(StdNamespace, Child);
+  }
+};
+
+// FIXME: Also expand built-in substitutions?
+
+using CanonicalizingDemangler = itanium_demangle::Db<CanonicalizerAllocator>;
+}
+
+struct ItaniumManglingCanonicalizer::Impl {
+  CanonicalizingDemangler Demangler = {nullptr, nullptr};
+};
+
+ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
+ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
+
+ItaniumManglingCanonicalizer::EquivalenceError
+ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
+                                             StringRef Second) {
+  auto &Alloc = P->Demangler.ASTAllocator;
+
+  auto Parse = [&](StringRef Str) {
+    P->Demangler.reset(Str.begin(), Str.end());
+    Node *N = nullptr;
+    switch (Kind) {
+      // A <name>, with minor extensions to allow arbitrary namespace and
+      // template names that can't easily be written as <name>s.
+    case FragmentKind::Name:
+      // Very special case: allow "St" as a shorthand for "3std". It's not
+      // valid as a <name> mangling, but is nonetheless the most natural
+      // way to name the 'std' namespace.
+      if (Str.size() == 2 && P->Demangler.consumeIf("St"))
+        N = P->Demangler.make<itanium_demangle::NameType>("std");
+      // We permit substitutions to name templates without their template
+      // arguments. This mostly just falls out, as almost all template names
+      // are valid as <name>s, but we also want to parse <substitution>s as
+      // <name>s, even though they're not.
+      else if (Str.startswith("S"))
+        // Parse the substitution and optional following template arguments.
+        N = P->Demangler.parseType();
+      else
+        N = P->Demangler.parseName();
+      break;
+
+      // A <type>.
+    case FragmentKind::Type:
+      N = P->Demangler.parseType();
+      break;
+
+      // An <encoding>.
+    case FragmentKind::Encoding:
+      N = P->Demangler.parseEncoding();
+      break;
+    }
+
+    // If we have trailing junk, the mangling is invalid.
+    if (P->Demangler.numLeft() != 0)
+      N = nullptr;
+
+    // If any node was created after N, then we cannot safely remap it because
+    // it might already be in use by another node.
+    return std::make_pair(N, Alloc.isMostRecentlyCreated(N));
+  };
+
+  Node *FirstNode, *SecondNode;
+  bool FirstIsNew, SecondIsNew;
+
+  std::tie(FirstNode, FirstIsNew) = Parse(First);
+  if (!FirstNode)
+    return EquivalenceError::InvalidFirstMangling;
+
+  Alloc.trackUsesOf(FirstNode);
+  std::tie(SecondNode, SecondIsNew) = Parse(Second);
+  if (!SecondNode)
+    return EquivalenceError::InvalidSecondMangling;
+
+  // If they're already equivalent, there's nothing to do.
+  if (FirstNode == SecondNode)
+    return EquivalenceError::Success;
+
+  if (FirstIsNew && !Alloc.trackedNodeIsUsed())
+    Alloc.addRemapping(FirstNode, SecondNode);
+  else if (SecondIsNew)
+    Alloc.addRemapping(SecondNode, FirstNode);
+  else
+    return EquivalenceError::ManglingAlreadyUsed;
+
+  return EquivalenceError::Success;
+}
+
+ItaniumManglingCanonicalizer::Key
+ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
+  P->Demangler.reset(Mangling.begin(), Mangling.end());
+  return reinterpret_cast<Key>(P->Demangler.parse());
+}

Modified: llvm/trunk/unittests/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/CMakeLists.txt?rev=340663&r1=340662&r2=340663&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/CMakeLists.txt (original)
+++ llvm/trunk/unittests/Support/CMakeLists.txt Fri Aug 24 15:31:51 2018
@@ -31,6 +31,7 @@ add_llvm_unittest(SupportTests
   FormatVariadicTest.cpp
   GlobPatternTest.cpp
   Host.cpp
+  ItaniumManglingCanonicalizerTest.cpp
   JSONTest.cpp
   LEB128Test.cpp
   LineIteratorTest.cpp

Added: llvm/trunk/unittests/Support/ItaniumManglingCanonicalizerTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/ItaniumManglingCanonicalizerTest.cpp?rev=340663&view=auto
==============================================================================
--- llvm/trunk/unittests/Support/ItaniumManglingCanonicalizerTest.cpp (added)
+++ llvm/trunk/unittests/Support/ItaniumManglingCanonicalizerTest.cpp Fri Aug 24 15:31:51 2018
@@ -0,0 +1,315 @@
+//===-------------- ItaniumManglingCanonicalizerTest.cpp ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdlib>
+#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+#include "gtest/gtest.h"
+
+using EquivalenceError = llvm::ItaniumManglingCanonicalizer::EquivalenceError;
+using FragmentKind = llvm::ItaniumManglingCanonicalizer::FragmentKind;
+
+struct Equivalence {
+  FragmentKind Kind;
+  llvm::StringRef First;
+  llvm::StringRef Second;
+};
+
+// A set of manglings that should all be considered equivalent.
+using EquivalenceClass = std::initializer_list<llvm::StringRef>;
+
+struct Testcase {
+  // A set of equivalences to register.
+  std::initializer_list<Equivalence> Equivalences;
+  // A set of distinct equivalence classes created by registering the
+  // equivalences.
+  std::initializer_list<EquivalenceClass> Classes;
+};
+
+static std::initializer_list<Testcase> Testcases = {
+  // Three different manglings for std::string (old libstdc++, new libstdc++,
+  // libc++).
+  {
+    {
+      {FragmentKind::Type, "Ss",
+       "NSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE"},
+      {FragmentKind::Type, "Ss",
+       "NSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+    },
+    {
+      {"_Z1fv"},
+      {"_Z1fSs",
+       "_Z1fNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE",
+       "_Z1fNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+      {"_ZNKSs4sizeEv",
+       "_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE4sizeEv",
+       "_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4sizeEv"},
+    }
+  },
+
+  // Check that substitutions are properly handled.
+  {
+    {
+      // ::X <-> ::N::X<int>
+      {FragmentKind::Type, "1X", "N1N1XIiEE"},
+      // ::T<T<int, int>, T<int, int>> <-> T<int>
+      {FragmentKind::Type, "1TIS_IiiES0_E", "1TIiE"},
+      // A::B::foo <-> AB::foo
+      {FragmentKind::Name, "N1A1B3fooE", "N2AB3fooE"},
+    },
+    {
+      {"_Z1f1XPS_RS_", "_Z1fN1N1XIiEEPS1_RS1_"},
+      {"_ZN1A1B3fooE1TIS1_IiiES2_EPS3_RS3_", "_ZN2AB3fooE1TIiEPS1_RS1_"},
+    }
+  },
+
+  // Check that nested equivalences are properly handled.
+  {
+    {
+      // std::__1::char_traits == std::__cxx11::char_traits
+      // (Note that this is unused and should make no difference,
+      // but it should not cause us to fail to match up the cases
+      // below.)
+      {FragmentKind::Name,
+       "NSt3__111char_traitsE",
+       "NSt7__cxx1111char_traitsE"},
+      // std::__1::allocator == std::allocator
+      {FragmentKind::Name,
+       "NSt3__19allocatorE",
+       "Sa"}, // "Sa" is not strictly a <name> but we accept it as one.
+      // std::__1::vector == std::vector
+      {FragmentKind::Name,
+       "St6vector",
+       "NSt3__16vectorE"},
+      // std::__1::basic_string<
+      //   char
+      //   std::__1::char_traits<char>,
+      //   std::__1::allocator<char>> ==
+      // std::__cxx11::basic_string<
+      //   char,
+      //   std::char_traits<char>,
+      //   std::allocator<char>>
+      {FragmentKind::Type,
+       "NSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE",
+       "NSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+      // X<A> <-> X<B>
+      {FragmentKind::Type, "1XI1AE", "1XI1BE"},
+      // X <-> Y
+      {FragmentKind::Name, "1X", "1Y"},
+    },
+    {
+      // f(std::string)
+      {"_Z1fNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE",
+       "_Z1fNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+      // f(std::vector<int>)
+      {"_Z1fSt6vectorIiSaIiEE", "_Z1fNSt3__16vectorIiNS_9allocatorIiEEEE"},
+      // f(X<A>), f(X<B>), f(Y<A>), f(Y<B>)
+      {"_Z1f1XI1AE", "_Z1f1XI1BE", "_Z1f1YI1AE", "_Z1f1YI1BE"},
+      // f(X<C>), f(Y<C>)
+      {"_Z1f1XI1CE", "_Z1f1YI1CE"},
+    }
+  },
+
+  // Check namespace equivalences.
+  {
+    {
+      // std::__1 == std::__cxx11
+      {FragmentKind::Name, "St3__1", "St7__cxx11"},
+      // std::__1::allocator == std::allocator
+      {FragmentKind::Name, "NSt3__19allocatorE", "Sa"},
+      // std::vector == std::__1::vector
+      {FragmentKind::Name, "St6vector", "NSt3__16vectorE"},
+      // std::__cxx11::char_traits == std::char_traits
+      // (This indirectly means that std::__1::char_traits == std::char_traits,
+      // due to the std::__cxx11 == std::__1 equivalence, which is what we rely
+      // on below.)
+      {FragmentKind::Name, "NSt7__cxx1111char_traitsE", "St11char_traits"},
+    },
+    {
+      // f(std::foo)
+      {"_Z1fNSt7__cxx113fooE",
+       "_Z1fNSt3__13fooE"},
+      // f(std::string)
+      {"_Z1fNSt7__cxx1111char_traitsIcEE",
+       "_Z1fNSt3__111char_traitsIcEE",
+       "_Z1fSt11char_traitsIcE"},
+      // f(std::string)
+      {"_Z1fNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE",
+       "_Z1fNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+      // f(std::vector<int>)
+      {"_Z1fSt6vectorIiSaIiEE", "_Z1fNSt3__16vectorIiNS_9allocatorIiEEEE"},
+    }
+  },
+
+  // Check namespace equivalences for namespace 'std'. We support using 'St'
+  // for this, despite it not technically being a <name>.
+  {
+    {
+      // std::__1 == std
+      {FragmentKind::Name, "St3__1", "St"},
+      // std::__1 == std::__cxx11
+      {FragmentKind::Name, "St3__1", "St7__cxx11"},
+      // FIXME: Should a 'std' equivalence also cover the predefined
+      // substitutions?
+      // std::__1::allocator == std::allocator
+      {FragmentKind::Name, "NSt3__19allocatorE", "Sa"},
+    },
+    {
+      {"_Z1fSt3foo", "_Z1fNSt3__13fooE", "_Z1fNSt7__cxx113fooE"},
+      {"_Z1fNSt3bar3bazE", "_Z1fNSt3__13bar3bazE"},
+      // f(std::string)
+      {"_Z1fNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEE",
+       "_Z1fNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"},
+      // f(std::vector<int>)
+      {"_Z1fSt6vectorIiSaIiEE", "_Z1fNSt3__16vectorIiNS_9allocatorIiEEEE"},
+    }
+  },
+
+  // Check mutually-recursive equivalences.
+  {
+    {
+      {FragmentKind::Type, "1A", "1B"},
+      {FragmentKind::Type, "1A", "1C"},
+      {FragmentKind::Type, "1D", "1B"},
+      {FragmentKind::Type, "1C", "1E"},
+    },
+    {
+      {"_Z1f1A", "_Z1f1B", "_Z1f1C", "_Z1f1D", "_Z1f1E"},
+      {"_Z1f1F"},
+    }
+  },
+
+  // Check <encoding>s.
+  {
+    {
+      {FragmentKind::Encoding, "1fv", "1gv"},
+    },
+    {
+      // f(void) -> g(void)
+      {"_Z1fv", "_Z1gv"},
+      // static local 'n' in f(void) -> static local 'n' in g(void)
+      {"_ZZ1fvE1n", "_ZZ1gvE1n"},
+    }
+  },
+
+  // Corner case: the substitution can appear within its own expansion.
+  {
+    {
+      // X <-> Y<X>
+      {FragmentKind::Type, "1X", "1YI1XE"},
+      // A<B> <-> B
+      {FragmentKind::Type, "1AI1BE", "1B"},
+    },
+    {
+      // f(X) == f(Y<X>) == f(Y<Y<X>>) == f(Y<Y<Y<X>>>)
+      {"_Z1f1X", "_Z1f1YI1XE", "_Z1f1YIS_I1XEE", "_Z1f1YIS_IS_I1XEEE"},
+      // f(B) == f(A<B>) == f(A<A<B>>) == f(A<A<A<B>>>)
+      {"_Z1f1B", "_Z1f1AI1BE", "_Z1f1AIS_I1BEE", "_Z1f1AIS_IS_I1BEEE"},
+    }
+  },
+
+  // Redundant equivalences are accepted (and have no effect).
+  {
+    {
+      {FragmentKind::Name, "3std", "St"},
+      {FragmentKind::Name, "1X", "1Y"},
+      {FragmentKind::Name, "N1X1ZE", "N1Y1ZE"},
+    },
+    {}
+  },
+
+  // ForwardTemplateReference does not support canonicalization.
+  // FIXME: We should consider ways of fixing this, perhaps by eliminating
+  // the ForwardTemplateReference node with a tree transformation.
+  {
+    {
+      // X::operator T() <with T = A> == Y::operator T() <with T = A>
+      {FragmentKind::Encoding, "N1XcvT_I1AEEv", "N1YcvT_I1AEEv"},
+      // A == B
+      {FragmentKind::Name, "1A", "1B"},
+    },
+    {
+      // All combinations result in unique equivalence classes.
+      {"_ZN1XcvT_I1AEEv"},
+      {"_ZN1XcvT_I1BEEv"},
+      {"_ZN1YcvT_I1AEEv"},
+      {"_ZN1YcvT_I1BEEv"},
+      // Even giving the same string twice gives a new class.
+      {"_ZN1XcvT_I1AEEv"},
+    }
+  },
+};
+
+TEST(ItaniumManglingCanonicalizerTest, TestTestcases) {
+  for (const auto &Testcase : Testcases) {
+    llvm::ItaniumManglingCanonicalizer Canonicalizer;
+    for (const auto &Equiv : Testcase.Equivalences) {
+      auto Result =
+          Canonicalizer.addEquivalence(Equiv.Kind, Equiv.First, Equiv.Second);
+      EXPECT_EQ(Result, EquivalenceError::Success)
+          << "couldn't add equivalence between " << Equiv.First << " and "
+          << Equiv.Second;
+    }
+
+    using CanonKey = llvm::ItaniumManglingCanonicalizer::Key;
+    std::map<CanonKey, llvm::StringRef> Found;
+    for (const auto &Class : Testcase.Classes) {
+      CanonKey ClassKey = {};
+      for (llvm::StringRef Str : Class) {
+        CanonKey ThisKey = Canonicalizer.canonicalize(Str);
+        EXPECT_NE(ThisKey, CanonKey()) << "couldn't canonicalize " << Str;
+        if (ClassKey) {
+          EXPECT_EQ(ThisKey, ClassKey)
+              << Str << " not in the same class as " << *Class.begin();
+        } else {
+          ClassKey = ThisKey;
+        }
+      }
+      EXPECT_TRUE(Found.insert({ClassKey, *Class.begin()}).second)
+          << *Class.begin() << " is in the same class as " << Found[ClassKey];
+    }
+  }
+}
+
+TEST(ItaniumManglingCanonicalizerTest, TestInvalidManglings) {
+  llvm::ItaniumManglingCanonicalizer Canonicalizer;
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "", "1X"),
+            EquivalenceError::InvalidFirstMangling);
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "1X", "1ab"),
+            EquivalenceError::InvalidSecondMangling);
+  EXPECT_EQ(Canonicalizer.canonicalize("_Z3fooE"),
+            llvm::ItaniumManglingCanonicalizer::Key());
+  EXPECT_EQ(Canonicalizer.canonicalize("foo"),
+            llvm::ItaniumManglingCanonicalizer::Key());
+
+  // A reference to a template parameter ('T_' etc) cannot appear in a <name>,
+  // because we don't have template arguments to bind to it. (The arguments in
+  // an 'I ... E' construct in the <name> aren't registered as
+  // backreferenceable arguments in this sense, because they're not part of
+  // the template argument list of an <encoding>.
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Name, "N1XcvT_I1AEE",
+                                         "1f"),
+            EquivalenceError::InvalidFirstMangling);
+}
+
+TEST(ItaniumManglingCanonicalizerTest, TestBadEquivalenceOrder) {
+  llvm::ItaniumManglingCanonicalizer Canonicalizer;
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "N1P1XE", "N1Q1XE"),
+            EquivalenceError::Success);
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "1P", "1Q"),
+            EquivalenceError::ManglingAlreadyUsed);
+
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "N1C1XE", "N1A1YE"),
+            EquivalenceError::Success);
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "1A", "1B"),
+            EquivalenceError::Success);
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "1C", "1D"),
+            EquivalenceError::Success);
+  EXPECT_EQ(Canonicalizer.addEquivalence(FragmentKind::Type, "1B", "1D"),
+            EquivalenceError::ManglingAlreadyUsed);
+}




More information about the llvm-commits mailing list