[clang] Improve sort mangled (PR #67066)

Matheus Izvekov via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 21 14:33:38 PDT 2023


https://github.com/mizvekov created https://github.com/llvm/llvm-project/pull/67066

This is stacked on top of https://github.com/llvm/llvm-project/pull/66816
Disregard first commit.

>From 47bd926491f28f722c429f3ab4a993aa5d85d1b7 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov <mizvekov at gmail.com>
Date: Tue, 19 Sep 2023 22:21:25 +0200
Subject: [PATCH 1/2] -fsanitize=function: fix MSVC hashing to sugared type

Hashing the sugared type instead of the canonical type meant that
a simple example like this would always fail under MSVC:

```
static auto l() {}
int main() {
  auto a = l;
  a();
}
```
`clang --target=x86_64-pc-windows-msvc -fno-exceptions -fsanitize=function -g -O0 -fuse-ld=lld -o test.exe test.cc`

produces:
```
test.cc:4:3: runtime error: call to function l through pointer to incorrect function type 'void (*)()'
```
---
 clang/include/clang/AST/Mangle.h              | 4 ++--
 clang/lib/AST/Expr.cpp                        | 2 +-
 clang/lib/AST/ItaniumMangle.cpp               | 8 ++++----
 clang/lib/AST/MicrosoftMangle.cpp             | 8 ++++----
 clang/lib/CodeGen/CGBlocks.cpp                | 2 +-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 2 +-
 clang/lib/CodeGen/CGVTables.cpp               | 5 +++--
 clang/lib/CodeGen/CodeGenFunction.cpp         | 2 +-
 clang/lib/CodeGen/CodeGenModule.cpp           | 4 ++--
 clang/lib/CodeGen/CodeGenTBAA.cpp             | 4 ++--
 clang/test/CodeGen/ubsan-function-sugared.cpp | 3 +--
 clang/unittests/AST/DeclTest.cpp              | 4 ++--
 12 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h
index c04bcc7f01cb4e6..e586b0cec43dfd6 100644
--- a/clang/include/clang/AST/Mangle.h
+++ b/clang/include/clang/AST/Mangle.h
@@ -178,8 +178,8 @@ class MangleContext {
   /// or type uniquing.
   /// TODO: Extend this to internal types by generating names that are unique
   /// across translation units so it can be used with LTO.
-  virtual void mangleTypeName(QualType T, raw_ostream &,
-                              bool NormalizeIntegers = false) = 0;
+  virtual void mangleCanonicalTypeName(QualType T, raw_ostream &,
+                                       bool NormalizeIntegers = false) = 0;
 
   /// @}
 };
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 4f3837371b3fc5e..f94482206b738d5 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -659,7 +659,7 @@ std::string SYCLUniqueStableNameExpr::ComputeName(ASTContext &Context,
   std::string Buffer;
   Buffer.reserve(128);
   llvm::raw_string_ostream Out(Buffer);
-  Ctx->mangleTypeName(Ty, Out);
+  Ctx->mangleCanonicalTypeName(Ty, Out);
 
   return Out.str();
 }
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 3bbc0a5767ff49d..87c3f233ddd7bfd 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -112,8 +112,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext {
   void mangleCXXRTTI(QualType T, raw_ostream &) override;
   void mangleCXXRTTIName(QualType T, raw_ostream &,
                          bool NormalizeIntegers) override;
-  void mangleTypeName(QualType T, raw_ostream &,
-                      bool NormalizeIntegers) override;
+  void mangleCanonicalTypeName(QualType T, raw_ostream &,
+                               bool NormalizeIntegers) override;
 
   void mangleCXXCtorComdat(const CXXConstructorDecl *D, raw_ostream &) override;
   void mangleCXXDtorComdat(const CXXDestructorDecl *D, raw_ostream &) override;
@@ -7041,8 +7041,8 @@ void ItaniumMangleContextImpl::mangleCXXRTTIName(
   Mangler.mangleType(Ty);
 }
 
-void ItaniumMangleContextImpl::mangleTypeName(QualType Ty, raw_ostream &Out,
-                                              bool NormalizeIntegers = false) {
+void ItaniumMangleContextImpl::mangleCanonicalTypeName(
+    QualType Ty, raw_ostream &Out, bool NormalizeIntegers = false) {
   mangleCXXRTTIName(Ty, Out, NormalizeIntegers);
 }
 
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 79175c79de96bf8..c1320cbdf0e6385 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -196,8 +196,8 @@ class MicrosoftMangleContextImpl : public MicrosoftMangleContext {
   mangleCXXRTTICompleteObjectLocator(const CXXRecordDecl *Derived,
                                      ArrayRef<const CXXRecordDecl *> BasePath,
                                      raw_ostream &Out) override;
-  void mangleTypeName(QualType T, raw_ostream &,
-                      bool NormalizeIntegers) override;
+  void mangleCanonicalTypeName(QualType T, raw_ostream &,
+                               bool NormalizeIntegers) override;
   void mangleReferenceTemporary(const VarDecl *, unsigned ManglingNumber,
                                 raw_ostream &) override;
   void mangleStaticGuardVariable(const VarDecl *D, raw_ostream &Out) override;
@@ -3838,13 +3838,13 @@ void MicrosoftMangleContextImpl::mangleSEHFinallyBlock(
   Mangler.mangleName(EnclosingDecl);
 }
 
-void MicrosoftMangleContextImpl::mangleTypeName(
+void MicrosoftMangleContextImpl::mangleCanonicalTypeName(
     QualType T, raw_ostream &Out, bool NormalizeIntegers = false) {
   // This is just a made up unique string for the purposes of tbaa.  undname
   // does *not* know how to demangle it.
   MicrosoftCXXNameMangler Mangler(*this, Out);
   Mangler.getStream() << '?';
-  Mangler.mangleType(T, SourceRange());
+  Mangler.mangleType(T.getCanonicalType(), SourceRange());
 }
 
 void MicrosoftMangleContextImpl::mangleReferenceTemporary(
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index 4f64012fc1a5c39..afa9156ceaffbbd 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1716,7 +1716,7 @@ static std::string getBlockCaptureStr(const CGBlockInfo::Capture &Cap,
     Str += "c";
     SmallString<256> TyStr;
     llvm::raw_svector_ostream Out(TyStr);
-    CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out);
+    CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(CaptureTy, Out);
     Str += llvm::to_string(TyStr.size()) + TyStr.c_str();
     break;
   }
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 92b7c8d4aa546f0..c5096201ad95353 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9069,7 +9069,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   SmallString<64> TyStr;
   llvm::raw_svector_ostream Out(TyStr);
-  CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
+  CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
                                     Name, &CGM.getModule());
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 23cfcdd138439f0..a430764f9985c21 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1316,6 +1316,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
                                 AP.second.AddressPointIndex));
 
   // Sort the address points for determinism.
+  // FIXME: It's more efficient to mangle the types before sorting.
   llvm::sort(AddressPoints, [this](const AddressPoint &AP1,
                                    const AddressPoint &AP2) {
     if (&AP1 == &AP2)
@@ -1323,13 +1324,13 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
 
     std::string S1;
     llvm::raw_string_ostream O1(S1);
-    getCXXABI().getMangleContext().mangleTypeName(
+    getCXXABI().getMangleContext().mangleCanonicalTypeName(
         QualType(AP1.first->getTypeForDecl(), 0), O1);
     O1.flush();
 
     std::string S2;
     llvm::raw_string_ostream O2(S2);
-    getCXXABI().getMangleContext().mangleTypeName(
+    getCXXABI().getMangleContext().mangleCanonicalTypeName(
         QualType(AP2.first->getTypeForDecl(), 0), O2);
     O2.flush();
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index bf83171e2c68146..b76ca56d1c3940b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -576,7 +576,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const {
     Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None);
   std::string Mangled;
   llvm::raw_string_ostream Out(Mangled);
-  CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out, false);
+  CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out, false);
   return llvm::ConstantInt::get(
       CGM.Int32Ty, static_cast<uint32_t>(llvm::xxh3_64bits(Mangled)));
 }
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8b0c9340775cbe9..da0e471c8d06e3e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1999,7 +1999,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
 
   std::string OutName;
   llvm::raw_string_ostream Out(OutName);
-  getCXXABI().getMangleContext().mangleTypeName(
+  getCXXABI().getMangleContext().mangleCanonicalTypeName(
       T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers);
 
   if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
@@ -7199,7 +7199,7 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
   if (isExternallyVisible(T->getLinkage())) {
     std::string OutName;
     llvm::raw_string_ostream Out(OutName);
-    getCXXABI().getMangleContext().mangleTypeName(
+    getCXXABI().getMangleContext().mangleCanonicalTypeName(
         T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers);
 
     if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 395ed7b1d703a5e..8705d3d65f1a573 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -205,7 +205,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
 
     SmallString<256> OutName;
     llvm::raw_svector_ostream Out(OutName);
-    MContext.mangleTypeName(QualType(ETy, 0), Out);
+    MContext.mangleCanonicalTypeName(QualType(ETy, 0), Out);
     return createScalarTypeNode(OutName, getChar(), Size);
   }
 
@@ -391,7 +391,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
     if (Features.CPlusPlus) {
       // Don't use the mangler for C code.
       llvm::raw_svector_ostream Out(OutName);
-      MContext.mangleTypeName(QualType(Ty, 0), Out);
+      MContext.mangleCanonicalTypeName(QualType(Ty, 0), Out);
     } else {
       OutName = RD->getName();
     }
diff --git a/clang/test/CodeGen/ubsan-function-sugared.cpp b/clang/test/CodeGen/ubsan-function-sugared.cpp
index 238bf31f4aba6d1..fb2487c024ba903 100644
--- a/clang/test/CodeGen/ubsan-function-sugared.cpp
+++ b/clang/test/CodeGen/ubsan-function-sugared.cpp
@@ -40,5 +40,4 @@ void caller() {
 }
 
 // GNU:  ![[FUNCSAN]] = !{i32 -1056584962, i32 905068220}
-// FIXME: Wrong hash
-// MSVC: ![[FUNCSAN]] = !{i32 -1056584962, i32 165986058}
+// MSVC: ![[FUNCSAN]] = !{i32 -1056584962, i32 -1600339357}
diff --git a/clang/unittests/AST/DeclTest.cpp b/clang/unittests/AST/DeclTest.cpp
index d2977b0cb55b64a..474d3dadb5a52c6 100644
--- a/clang/unittests/AST/DeclTest.cpp
+++ b/clang/unittests/AST/DeclTest.cpp
@@ -135,8 +135,8 @@ TEST(Decl, MangleDependentSizedArray) {
   std::unique_ptr<ItaniumMangleContext> MC(
       ItaniumMangleContext::create(Ctx, Diags));
 
-  MC->mangleTypeName(DeclA->getType(), OS_A);
-  MC->mangleTypeName(DeclB->getType(), OS_B);
+  MC->mangleCanonicalTypeName(DeclA->getType(), OS_A);
+  MC->mangleCanonicalTypeName(DeclB->getType(), OS_B);
 
   ASSERT_TRUE(0 == MangleA.compare("_ZTSA_i"));
   ASSERT_TRUE(0 == MangleB.compare("_ZTSAT0__T_"));

>From b8a4c98c60b187c9ccbbe6cc8cdb0af8a0b2af28 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov <mizvekov at gmail.com>
Date: Thu, 21 Sep 2023 22:57:27 +0200
Subject: [PATCH 2/2] [NFC][Clang][CodeGen] Improve performance for vtable
 metadata generation

Mangle each AddressPoint once, instead of once per comparison.
---
 clang/lib/CodeGen/CGVTables.cpp | 54 ++++++++++++++-------------------
 1 file changed, 22 insertions(+), 32 deletions(-)

diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index a430764f9985c21..54df3dd4297a804 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
 #include <cstdio>
+#include <utility>
 
 using namespace clang;
 using namespace CodeGen;
@@ -1308,44 +1309,33 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
 
   CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());
 
-  typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint;
+  struct AddressPoint {
+    const CXXRecordDecl *Base;
+    size_t Offset;
+    std::string TypeName;
+    bool operator<(const AddressPoint &RHS) const {
+      int D = TypeName.compare(RHS.TypeName);
+      return D < 0 || (D == 0 && Offset < RHS.Offset);
+    }
+  };
   std::vector<AddressPoint> AddressPoints;
-  for (auto &&AP : VTLayout.getAddressPoints())
-    AddressPoints.push_back(std::make_pair(
-        AP.first.getBase(), VTLayout.getVTableOffset(AP.second.VTableIndex) +
-                                AP.second.AddressPointIndex));
-
-  // Sort the address points for determinism.
-  // FIXME: It's more efficient to mangle the types before sorting.
-  llvm::sort(AddressPoints, [this](const AddressPoint &AP1,
-                                   const AddressPoint &AP2) {
-    if (&AP1 == &AP2)
-      return false;
-
-    std::string S1;
-    llvm::raw_string_ostream O1(S1);
-    getCXXABI().getMangleContext().mangleCanonicalTypeName(
-        QualType(AP1.first->getTypeForDecl(), 0), O1);
-    O1.flush();
-
-    std::string S2;
-    llvm::raw_string_ostream O2(S2);
+  for (auto &&AP : VTLayout.getAddressPoints()) {
+    AddressPoint N{AP.first.getBase(),
+                   VTLayout.getVTableOffset(AP.second.VTableIndex) +
+                       AP.second.AddressPointIndex};
+    llvm::raw_string_ostream Stream(N.TypeName);
     getCXXABI().getMangleContext().mangleCanonicalTypeName(
-        QualType(AP2.first->getTypeForDecl(), 0), O2);
-    O2.flush();
-
-    if (S1 < S2)
-      return true;
-    if (S1 != S2)
-      return false;
+        QualType(N.Base->getTypeForDecl(), 0), Stream);
+    AddressPoints.push_back(std::move(N));
+  }
 
-    return AP1.second < AP2.second;
-  });
+  // Sort the address points for determinism.
+  llvm::sort(AddressPoints);
 
   ArrayRef<VTableComponent> Comps = VTLayout.vtable_components();
   for (auto AP : AddressPoints) {
     // Create type metadata for the address point.
-    AddVTableTypeMetadata(VTable, ComponentWidth * AP.second, AP.first);
+    AddVTableTypeMetadata(VTable, ComponentWidth * AP.Offset, AP.Base);
 
     // The class associated with each address point could also potentially be
     // used for indirect calls via a member function pointer, so we need to
@@ -1357,7 +1347,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
       llvm::Metadata *MD = CreateMetadataIdentifierForVirtualMemPtrType(
           Context.getMemberPointerType(
               Comps[I].getFunctionDecl()->getType(),
-              Context.getRecordType(AP.first).getTypePtr()));
+              Context.getRecordType(AP.Base).getTypePtr()));
       VTable->addTypeMetadata((ComponentWidth * I).getQuantity(), MD);
     }
   }



More information about the cfe-commits mailing list