[llvm] 4f63a60 - [AArch64] Fix Arm64EC mangling with C++ symbols using MD5 mangling. (#160963)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 6 15:42:53 PDT 2025


Author: Eli Friedman
Date: 2025-10-06T15:42:50-07:00
New Revision: 4f63a60e5c99cdb866bae1300447e9c0c466aebc

URL: https://github.com/llvm/llvm-project/commit/4f63a60e5c99cdb866bae1300447e9c0c466aebc
DIFF: https://github.com/llvm/llvm-project/commit/4f63a60e5c99cdb866bae1300447e9c0c466aebc.diff

LOG: [AArch64] Fix Arm64EC mangling with C++ symbols using MD5 mangling. (#160963)

In addition to the usual mangling formats, MSVC uses a special mangling
format for very long symbols: instead of actually emitting the mangled
name, it hashes the name, and uses the hash as the symbol. This doesn't
match any of the usual forms, so it needs special handling.

Fixes a crash/link error when using such symbols.

(Unrelated to this patch, there's something weird about the way MSVC
handles these symbols in Arm64EC mode: it looks like MSVC is computing a
different hash compared to x64 MSVC, and clang. I think this is bug.
Noting this here in case someone wants to compare MSVC vs. clang
output.)

Added: 
    

Modified: 
    llvm/lib/IR/Mangler.cpp
    llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
    llvm/unittests/IR/ManglerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index ca6a4804087ac..55c825d272a2f 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -307,6 +307,19 @@ std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
   if (Name.contains("$$h"))
     return std::nullopt;
 
+  // Handle MD5 mangled names, which use a slightly 
diff erent rule from
+  // other C++ manglings.
+  //
+  // A non-Arm64EC function:
+  //
+  // ??@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@
+  //
+  // An Arm64EC function:
+  //
+  // ??@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@$$h@
+  if (Name.starts_with("??@") && Name.ends_with("@"))
+    return (Name + "$$h@").str();
+
   // Ask the demangler where we should insert "$$h".
   auto InsertIdx = getArm64ECInsertionPointInMangledName(Name);
   if (!InsertIdx)
@@ -324,6 +337,10 @@ llvm::getArm64ECDemangledFunctionName(StringRef Name) {
   if (Name[0] != '?')
     return std::nullopt;
 
+  // MD5 mangled name; see comment in getArm64ECMangledFunctionName.
+  if (Name.starts_with("??@") && Name.ends_with("@$$h@"))
+    return Name.drop_back(4).str();
+
   // Drop the ARM64EC "$$h" tag.
   std::pair<StringRef, StringRef> Pair = Name.split("$$h");
   if (Pair.second.empty())

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index f829227a47cd7..dc352244deeef 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -563,6 +563,41 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .seh_endfunclet
 ; CHECK-NEXT:     .seh_endproc
 
+declare void @"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"()
+; CHECK-LABEL:       .def    "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@";
+; CHECK-NEXT:        .scl    2;
+; CHECK-NEXT:        .type   32;
+; CHECK-NEXT:        .endef
+; CHECK-NEXT:        .section        .wowthk$aa,"xr",discard,"??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:        .globl  "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@" // -- Begin function ??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@
+; CHECK-NEXT:        .p2align        2
+; CHECK-NEXT: "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@": // @"??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:         .weak_anti_dep  "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT: "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@" = "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:         .weak_anti_dep  "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@" = "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: .seh_proc "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT:         str     x30, [sp, #-16]!                // 8-byte Folded Spill
+; CHECK-NEXT:         .seh_save_reg_x x30, 16
+; CHECK-NEXT:         .seh_endprologue
+; CHECK-NEXT:         adrp    x8, __os_arm64x_check_icall
+; CHECK-NEXT:         adrp    x11, "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:         add     x11, x11, :lo12:"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:         ldr     x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT:         adrp    x10, $iexit_thunk$cdecl$v$v
+; CHECK-NEXT:         add     x10, x10, :lo12:$iexit_thunk$cdecl$v$v
+; CHECK-NEXT:         blr     x8
+; CHECK-NEXT:         .seh_startepilogue
+; CHECK-NEXT:         ldr     x30, [sp], #16                  // 8-byte Folded Reload
+; CHECK-NEXT:         .seh_save_reg_x x30, 16
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         br      x11
+; CHECK-NEXT:         .seh_endfunclet
+; CHECK-NEXT:         .seh_endproc
+
+
+
 ; CHECK-LABEL:    .section        .hybmp$x,"yi"
 ; CHECK-NEXT:     .symidx "#func_caller"
 ; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$v$v
@@ -633,6 +668,12 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .symidx	"#large_vector$exit_thunk"
 ; CHECK-NEXT:     .symidx	large_vector
 ; CHECK-NEXT:     .word	0
+; CHECK-NEXT:     .symidx "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$v$v
+; CHECK-NEXT:     .word   4
+; CHECK-NEXT:     .symidx "??$exit_thunk at md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:     .symidx "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:     .word   0
 
 define void @func_caller() nounwind {
   call void @no_op()
@@ -649,5 +690,6 @@ define void @func_caller() nounwind {
   call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
   call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
   call <8 x i16> @large_vector(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  call void @"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"()
   ret void
 }

diff  --git a/llvm/unittests/IR/ManglerTest.cpp b/llvm/unittests/IR/ManglerTest.cpp
index bced6ff10b45d..bb0b3edd966e8 100644
--- a/llvm/unittests/IR/ManglerTest.cpp
+++ b/llvm/unittests/IR/ManglerTest.cpp
@@ -243,6 +243,9 @@ TEST(ManglerTest, Arm64EC) {
       // public: int __cdecl Wrapper<struct A>::GetValue(struct WW<struct
       // A>::Z)const
       "?GetValue@?$Wrapper at UA@@@@$$hQEBAHUZ@?$WW at UA@@@@@Z",
+
+      // MD5 symbol
+      "??@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@$$h@",
   };
 
   for (const auto &Arm64ECName : Arm64ECNames) {


        


More information about the llvm-commits mailing list