[clang] [libcxxabi] [lldb] [llvm] [WIP (PR #115245)

Michael Buch via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 17:03:42 PST 2024


https://github.com/Michael137 created https://github.com/llvm/llvm-project/pull/115245

None

>From f8087e96f8d8d242a4da2e74a3259161a3bfc179 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Tue, 5 Nov 2024 00:22:07 +0000
Subject: [PATCH] Init

---
 clang/include/clang/Basic/Attr.td             |   7 +
 clang/include/clang/Basic/AttrDocs.td         |   5 +
 clang/lib/AST/Mangle.cpp                      |  16 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |  11 ++
 libcxxabi/src/demangle/ItaniumDemangle.h      |   2 +
 lldb/source/Expression/IRExecutionUnit.cpp    | 181 ++++++++++++++++++
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |  29 ++-
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  12 +-
 llvm/include/llvm/Demangle/Demangle.h         |   2 +
 llvm/include/llvm/Demangle/ItaniumDemangle.h  |   2 +
 llvm/lib/Demangle/ItaniumDemangle.cpp         |  17 +-
 11 files changed, 271 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 70fad60d4edbb5..c0a4c38e479990 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -784,6 +784,13 @@ def AbiTag : Attr {
   let Documentation = [AbiTagsDocs];
 }
 
+def StructorName : Attr {
+    let Spellings = [Clang<"structor_name">];
+    let Args = [StringArgument<"Name">];
+    let Subjects = SubjectList<[Function], ErrorDiag>;
+    let Documentation = [StructorNameDocs];
+}
+
 def AddressSpace : TypeAttr {
   let Spellings = [Clang<"address_space">];
   let Args = [IntArgument<"AddressSpace">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 546e5100b79dd9..3672d0d5019fc9 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3568,6 +3568,11 @@ manipulating bits of the enumerator when issuing warnings.
   }];
 }
 
+def StructorNameDocs : Documentation {
+    let Category = DocCatDecl;
+    let Content = [{ TODO }];
+}
+
 def AsmLabelDocs : Documentation {
   let Category = DocCatDecl;
   let Content = [{
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 4875e8537b3c11..e193c96dd064e7 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -126,7 +126,7 @@ bool MangleContext::shouldMangleDeclName(const NamedDecl *D) {
 
   // Any decl can be declared with __asm("foo") on it, and this takes precedence
   // over all other naming in the .o file.
-  if (D->hasAttr<AsmLabelAttr>())
+  if (D->hasAttr<AsmLabelAttr>() || D->hasAttr<StructorNameAttr>())
     return true;
 
   // Declarations that don't have identifier names always need to be mangled.
@@ -140,6 +140,20 @@ void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) {
   const ASTContext &ASTContext = getASTContext();
   const NamedDecl *D = cast<NamedDecl>(GD.getDecl());
 
+  if (const auto *SNA = D->getAttr<StructorNameAttr>()) {
+    Out << SNA->getName() << ':';
+
+    if (isa<CXXConstructorDecl>(D)) {
+      Out << 'C';
+      Out << GD.getCtorType();
+    } else {
+      Out << 'D';
+      Out << GD.getDtorType();
+    }
+
+    return;
+  }
+
   // Any decl can be declared with __asm("foo") on it, and this takes precedence
   // over all other naming in the .o file.
   if (const AsmLabelAttr *ALA = D->getAttr<AsmLabelAttr>()) {
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 14cc51cf89665a..21302245484cfa 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1689,6 +1689,14 @@ static void handleIFuncAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) IFuncAttr(S.Context, AL, Str));
 }
 
+static void handleStructorNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef Str;
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
+    return;
+
+  D->addAttr(::new (S.Context) StructorNameAttr(S.Context, AL, Str));
+}
+
 static void handleAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   StringRef Str;
   if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
@@ -6983,6 +6991,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
     S.HLSL().handleParamModifierAttr(D, AL);
     break;
 
+  case ParsedAttr::AT_StructorName:
+    handleStructorNameAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_AbiTag:
     handleAbiTagAttr(S, D, AL);
     break;
diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 3b041efe3aac00..ea32fa4827d290 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -1728,6 +1728,8 @@ class CtorDtorName final : public Node {
 
   template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
 
+  int getVariant() const { return Variant; }
+
   void printLeft(OutputBuffer &OB) const override {
     if (IsDtor)
       OB += "~";
diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index 15ca2ddbbae046..edcc82a397021b 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Basic/ABI.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/IR/Constants.h"
@@ -13,9 +15,14 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include "Plugins/SymbolFile/DWARF/DWARFBaseDIE.h"
+#include "Plugins/SymbolFile/DWARF/DWARFDIE.h"
+#include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h"
+
 #include "lldb/Core/Debugger.h"
 #include "lldb/Core/Disassembler.h"
 #include "lldb/Core/Module.h"
@@ -36,8 +43,10 @@
 #include "lldb/Utility/LLDBAssert.h"
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
+#include "lldb/lldb-defines.h"
 
 #include <optional>
+#include <variant>
 
 using namespace lldb_private;
 
@@ -762,6 +771,120 @@ class LoadAddressResolver {
   lldb::addr_t m_best_internal_load_address = LLDB_INVALID_ADDRESS;
 };
 
+using namespace lldb_private::plugin::dwarf;
+
+struct StructorVariant {
+  std::variant<clang::CXXCtorType, clang::CXXDtorType> m_variant;
+};
+
+static llvm::Expected<StructorVariant>
+MakeStructorVariant(llvm::StringRef variant_num) {
+  if (variant_num.consume_front("D")) {
+    std::underlying_type_t<clang::CXXDtorType> dtor_type;
+    if (variant_num.consumeInteger(10, dtor_type))
+      return llvm::createStringError("Invalid ctor variant code.");
+
+    return StructorVariant{.m_variant =
+                               static_cast<clang::CXXDtorType>(dtor_type)};
+  }
+
+  if (variant_num.consume_front("C")) {
+    std::underlying_type_t<clang::CXXCtorType> ctor_type;
+    if (variant_num.consumeInteger(10, ctor_type))
+      return llvm::createStringError("Invalid dtor variant code.");
+
+    return StructorVariant{.m_variant =
+                               static_cast<clang::CXXCtorType>(ctor_type)};
+  }
+
+  return llvm::createStringError("Incorrect structor variant prefix.");
+}
+
+static int GetItaniumVariantCode(StructorVariant structor) {
+  if (auto const *ctor = std::get_if<clang::CXXCtorType>(&structor.m_variant)) {
+    switch (*ctor) {
+    case clang::CXXCtorType::Ctor_Complete:
+      return 1;
+    case clang::CXXCtorType::Ctor_Base:
+      return 2;
+    default:
+      llvm_unreachable("Unimplemented");
+    }
+  } else {
+    switch (std::get<clang::CXXDtorType>(structor.m_variant)) {
+    case clang::CXXDtorType::Dtor_Complete:
+      return 1;
+    case clang::CXXDtorType::Dtor_Base:
+      return 2;
+    default:
+      llvm_unreachable("Unimplemented");
+    }
+  }
+}
+
+// TODO:
+// 1. MS-ABI
+// 2. GCC-style dtor/ctor declarations
+// 3. Inheriting ctors
+// 4. Regular functions
+static std::string FindStructorLinkageName(DWARFDIE die,
+                                           StructorVariant structor_variant) {
+  auto *dwarf = die.GetDWARF();
+  assert(dwarf);
+
+  // Note, GCC only puts DW_AT_linkage_name (not DW_AT_name) on constructor
+  // decls Will those cases still work?
+  ConstString func_name(die.GetName());
+  assert(func_name);
+
+  SymbolContextList sc_list;
+  Module::LookupInfo lookup_info(
+      func_name,
+      lldb::FunctionNameType::eFunctionNameTypeMethod |
+          lldb::FunctionNameType::eFunctionNameTypeFull,
+      lldb::LanguageType::eLanguageTypeUnknown);
+  dwarf->FindFunctions(lookup_info, {}, true, sc_list);
+
+  llvm::DenseMap<int, std::string> variants;
+
+  for (auto const &sc : sc_list.SymbolContexts()) {
+    if (!sc.function)
+      continue;
+
+    auto func_die = dwarf->GetDIE(sc.function->GetID());
+    if (!func_die.IsValid())
+      continue;
+
+    auto spec_die = func_die.GetAttributeValueAsReferenceDIE(
+        llvm::dwarf::DW_AT_specification);
+    if (!spec_die.IsValid() || spec_die != die)
+      continue;
+
+    llvm::ItaniumPartialDemangler D;
+    if (D.partialDemangle(func_die.GetMangledName()))
+      continue;
+
+    const auto maybe_structor_kind = D.getCtorDtorVariant();
+    // TODO: this need not be true
+    assert(maybe_structor_kind);
+
+    variants.insert({*maybe_structor_kind, func_die.GetMangledName()});
+  }
+
+  auto itanium_code = GetItaniumVariantCode(structor_variant);
+  auto it = variants.find(itanium_code);
+  if (it != variants.end())
+    return it->second;
+
+  // If only C2 was emitted but we tried calling C1,
+  // we can probably (?) safely call C2.
+  if (itanium_code == 1 && variants.size() == 1)
+    if (auto retry = variants.find(2); retry != variants.end())
+      return retry->second;
+
+  return {};
+}
+
 lldb::addr_t
 IRExecutionUnit::FindInSymbols(const std::vector<ConstString> &names,
                                const lldb_private::SymbolContext &sc,
@@ -781,6 +904,64 @@ IRExecutionUnit::FindInSymbols(const std::vector<ConstString> &names,
   function_options.include_inlines = false;
 
   for (const ConstString &name : names) {
+    auto ref = name.GetStringRef();
+    if (ref.consume_front("$__lldb_func_")) {
+      uintptr_t module_ptr;
+      if (ref.consumeInteger(0, module_ptr))
+        return LLDB_INVALID_ADDRESS;
+
+      if (module_ptr == 0) {
+        // TODO: log this case. We should ever be putting a null module pointer
+        // here
+        return LLDB_INVALID_ADDRESS;
+      }
+
+      auto *mod = (lldb_private::Module *)module_ptr;
+      assert(mod);
+      auto *sym = mod->GetSymbolFile();
+      assert(sym);
+
+      if (!ref.consume_front(":"))
+        return LLDB_INVALID_ADDRESS;
+
+      lldb::user_id_t die_id;
+      if (ref.consumeInteger(10, die_id))
+        return LLDB_INVALID_ADDRESS;
+
+      auto *dwarf = llvm::dyn_cast<plugin::dwarf::SymbolFileDWARF>(sym);
+      if (!dwarf)
+        return LLDB_INVALID_ADDRESS;
+
+      auto die = dwarf->GetDIE(die_id);
+      if (!die.IsValid())
+        return LLDB_INVALID_ADDRESS;
+
+      // TODO: account for MS-ABI (where there are no ctor variants in the
+      // mangling)
+      if (!ref.consume_front(":"))
+        return LLDB_INVALID_ADDRESS;
+
+      auto structor_variant_or_err = MakeStructorVariant(ref);
+      if (!structor_variant_or_err) {
+        LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions),
+                       structor_variant_or_err.takeError(),
+                       "Failed to parse structor variant encoding for {1}: {0}",
+                       name.GetStringRef());
+        return LLDB_INVALID_ADDRESS;
+      }
+
+      ConstString mangled(
+          FindStructorLinkageName(die, *structor_variant_or_err));
+
+      Module::LookupInfo lookup_info(
+          mangled, lldb::FunctionNameType::eFunctionNameTypeAny,
+          lldb::LanguageType::eLanguageTypeC_plus_plus);
+      SymbolContextList sc_list;
+      dwarf->FindFunctions(lookup_info, {}, false, sc_list);
+      if (auto load_addr = resolver.Resolve(sc_list))
+        return *load_addr;
+    }
+
     if (sc.module_sp) {
       SymbolContextList sc_list;
       sc.module_sp->FindFunctions(name, CompilerDeclContext(),
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 70540fe7fada68..9078234cf3fa35 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -1040,6 +1040,16 @@ bool DWARFASTParserClang::ParseObjCMethod(
   return true;
 }
 
+static bool IsStructorDIE(DWARFDIE const &die, DWARFDIE const &parent_die) {
+  llvm::StringRef name = die.GetName();
+  llvm::StringRef parent_name = parent_die.GetName();
+
+  name.consume_front("~");
+  parent_name = parent_name.substr(0, parent_name.find('<'));
+
+  return name == parent_name;
+}
+
 std::pair<bool, TypeSP> DWARFASTParserClang::ParseCXXMethod(
     const DWARFDIE &die, CompilerType clang_type,
     const ParsedDWARFTypeAttributes &attrs, const DWARFDIE &decl_ctx_die,
@@ -1140,11 +1150,22 @@ std::pair<bool, TypeSP> DWARFASTParserClang::ParseCXXMethod(
   const auto accessibility =
       attrs.accessibility == eAccessNone ? eAccessPublic : attrs.accessibility;
 
+  // TODO: we should also include mangled name in identifier for
+  // better diagnostics and easier debugging when reading the
+  // expression evaluator IR.
+  std::string mangled_name;
+  if (IsStructorDIE(die, decl_ctx_die))
+    mangled_name = llvm::formatv("$__lldb_func_{0}:{1}", die.GetModule().get(),
+                                 die.GetID())
+                       .str();
+
+  char const *mangled =
+      mangled_name.empty() ? attrs.mangled_name : mangled_name.c_str();
+
   clang::CXXMethodDecl *cxx_method_decl = m_ast.AddMethodToCXXRecordType(
-      class_opaque_type.GetOpaqueQualType(), attrs.name.GetCString(),
-      attrs.mangled_name, clang_type, accessibility, attrs.is_virtual,
-      is_static, attrs.is_inline, attrs.is_explicit, is_attr_used,
-      attrs.is_artificial);
+      class_opaque_type.GetOpaqueQualType(), attrs.name.GetCString(), mangled,
+      clang_type, accessibility, attrs.is_virtual, is_static, attrs.is_inline,
+      attrs.is_explicit, is_attr_used, attrs.is_artificial);
 
   if (cxx_method_decl) {
     LinkDeclContextToDIE(cxx_method_decl, die);
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index b0f49ebf2d2cbb..f8096bfca6bf4b 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -7777,6 +7777,7 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
       nullptr /*expr*/, is_explicit ? clang::ExplicitSpecKind::ResolvedTrue
                                     : clang::ExplicitSpecKind::ResolvedFalse);
 
+  bool is_ctor_or_dtor = false;
   if (name.starts_with("~")) {
     cxx_dtor_decl = clang::CXXDestructorDecl::CreateDeserialized(
         getASTContext(), GlobalDeclID());
@@ -7803,6 +7804,7 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
     cxx_ctor_decl->setNumCtorInitializers(0);
     cxx_ctor_decl->setExplicitSpecifier(explicit_spec);
     cxx_method_decl = cxx_ctor_decl;
+    is_ctor_or_dtor = true;
   } else {
     clang::StorageClass SC = is_static ? clang::SC_Static : clang::SC_None;
     clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;
@@ -7826,6 +7828,7 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
         cxx_method_decl->setStorageClass(SC);
         cxx_method_decl->setInlineSpecified(is_inline);
         cxx_method_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
+        is_ctor_or_dtor = true;
       } else if (num_params == 0) {
         // Conversion operators don't take params...
         auto *cxx_conversion_decl =
@@ -7867,8 +7870,13 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
     cxx_method_decl->addAttr(clang::UsedAttr::CreateImplicit(getASTContext()));
 
   if (mangled_name != nullptr) {
-    cxx_method_decl->addAttr(clang::AsmLabelAttr::CreateImplicit(
-        getASTContext(), mangled_name, /*literal=*/false));
+    if (is_ctor_or_dtor) {
+      cxx_method_decl->addAttr(clang::StructorNameAttr::CreateImplicit(
+          getASTContext(), mangled_name));
+    } else {
+      cxx_method_decl->addAttr(clang::AsmLabelAttr::CreateImplicit(
+          getASTContext(), mangled_name, /*literal=*/false));
+    }
   }
 
   // Populate the method decl with parameter decls
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index fe129603c0785d..c9b61995133d04 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -10,6 +10,7 @@
 #define LLVM_DEMANGLE_DEMANGLE_H
 
 #include <cstddef>
+#include <optional>
 #include <string>
 #include <string_view>
 
@@ -110,6 +111,7 @@ struct ItaniumPartialDemangler {
 
   /// If this symbol describes a constructor or destructor.
   bool isCtorOrDtor() const;
+  std::optional<int> getCtorDtorVariant() const;
 
   /// If this symbol describes a function.
   bool isFunction() const;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 0af0224bc83fa8..39389132430ab0 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -1728,6 +1728,8 @@ class CtorDtorName final : public Node {
 
   template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
 
+  int getVariant() const { return Variant; }
+
   void printLeft(OutputBuffer &OB) const override {
     if (IsDtor)
       OB += "~";
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 5c21b06a1d0955..a1693c9fb3ffd1 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -548,15 +548,16 @@ bool ItaniumPartialDemangler::hasFunctionQualifiers() const {
   return E->getCVQuals() != QualNone || E->getRefQual() != FrefQualNone;
 }
 
-bool ItaniumPartialDemangler::isCtorOrDtor() const {
+std::optional<int> ItaniumPartialDemangler::getCtorDtorVariant() const {
   const Node *N = static_cast<const Node *>(RootNode);
   while (N) {
     switch (N->getKind()) {
     default:
-      return false;
-    case Node::KCtorDtorName:
-      return true;
-
+      return std::nullopt;
+    case Node::KCtorDtorName: {
+      auto const *StructorNode = static_cast<const CtorDtorName *>(N);
+      return StructorNode->getVariant();
+    }
     case Node::KAbiTagAttr:
       N = static_cast<const AbiTagAttr *>(N)->Base;
       break;
@@ -577,7 +578,11 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
       break;
     }
   }
-  return false;
+  return std::nullopt;
+}
+
+bool ItaniumPartialDemangler::isCtorOrDtor() const {
+  return getCtorDtorVariant().has_value();
 }
 
 bool ItaniumPartialDemangler::isFunction() const {



More information about the llvm-commits mailing list