[llvm] 7310403 - [demangler] Initial support for the new Rust mangling scheme

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Mon May 3 17:04:50 PDT 2021


Author: Tomasz Miąsko
Date: 2021-05-03T16:44:30-07:00
New Revision: 7310403e3cdf8a436f94770e1a1498db05d2d091

URL: https://github.com/llvm/llvm-project/commit/7310403e3cdf8a436f94770e1a1498db05d2d091
DIFF: https://github.com/llvm/llvm-project/commit/7310403e3cdf8a436f94770e1a1498db05d2d091.diff

LOG: [demangler] Initial support for the new Rust mangling scheme

Add a demangling support for a small subset of a new Rust mangling
scheme, with complete support planned as a follow up work.

Intergate Rust demangling into llvm-cxxfilt and use llvm-cxxfilt for
end-to-end testing. The new Rust mangling scheme uses "_R" as a prefix,
which makes it easy to disambiguate it from other mangling schemes.

The public API is modeled after __cxa_demangle / llvm::itaniumDemangle,
since potential candidates for further integration use those.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D101444

Added: 
    llvm/include/llvm/Demangle/RustDemangle.h
    llvm/lib/Demangle/RustDemangle.cpp
    llvm/test/Demangle/rust.test
    llvm/unittests/Demangle/RustDemangleTest.cpp

Modified: 
    llvm/include/llvm/Demangle/Demangle.h
    llvm/lib/Demangle/CMakeLists.txt
    llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
    llvm/unittests/Demangle/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index b4006a067d10b..c396a1dc5dd3a 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -57,6 +57,9 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read,
                         char *buf, size_t *n_buf,
                         int *status, MSDemangleFlags Flags = MSDF_None);
 
+// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
+char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
+
 /// Attempt to demangle a string using 
diff erent demangling schemes.
 /// The function uses heuristics to determine which demangling scheme to use.
 /// \param MangledName - reference to string to demangle.

diff  --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h
new file mode 100644
index 0000000000000..e2286f7e5b024
--- /dev/null
+++ b/llvm/include/llvm/Demangle/RustDemangle.h
@@ -0,0 +1,118 @@
+//===--- RustDemangle.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_RUSTDEMANGLE_H
+#define LLVM_DEMANGLE_RUSTDEMANGLE_H
+
+#include "llvm/Demangle/DemangleConfig.h"
+#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/Utility.h"
+
+namespace llvm {
+namespace rust_demangle {
+
+using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::StringView;
+
+struct Identifier {
+  StringView Name;
+  bool Punycode;
+
+  bool empty() const { return Name.empty(); }
+};
+
+class Demangler {
+  // Maximum recursion level. Used to avoid stack overflow.
+  size_t MaxRecursionLevel;
+  // Current recursion level.
+  size_t RecursionLevel;
+
+  // Input string that is being demangled with "_R" prefix removed.
+  StringView Input;
+  // Position in the input string.
+  size_t Position;
+
+  // True if an error occurred.
+  bool Error;
+
+public:
+  // Demangled output.
+  OutputStream Output;
+
+  Demangler(size_t MaxRecursionLevel = 500);
+
+  bool demangle(StringView MangledName);
+
+private:
+  void demanglePath();
+
+  Identifier parseIdentifier();
+  void parseOptionalBase62Number(char Tag);
+  uint64_t parseBase62Number();
+  uint64_t parseDecimalNumber();
+
+  void print(StringView S) {
+    if (Error)
+      return;
+
+    Output += S;
+  }
+
+  char look() const {
+    if (Error || Position >= Input.size())
+      return 0;
+
+    return Input[Position];
+  }
+
+  char consume() {
+    if (Error || Position >= Input.size()) {
+      Error = true;
+      return 0;
+    }
+
+    return Input[Position++];
+  }
+
+  bool consumeIf(char Prefix) {
+    if (Error || Position >= Input.size() || Input[Position] != Prefix)
+      return false;
+
+    Position += 1;
+    return true;
+  }
+
+  /// Computes A + B. When computation wraps around sets the error and returns
+  /// false. Otherwise assigns the result to A and returns true.
+  bool addAssign(uint64_t &A, const uint64_t B) {
+    if (A > std::numeric_limits<uint64_t>::max() - B) {
+      Error = true;
+      return false;
+    }
+
+    A += B;
+    return true;
+  }
+
+  /// Computes A * B. When computation wraps around sets the error and returns
+  /// false. Otherwise assigns the result to A and returns true.
+  bool mulAssign(uint64_t &A, const uint64_t B) {
+    if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
+      Error = true;
+      return false;
+    }
+
+    A *= B;
+    return true;
+  }
+};
+
+} // namespace rust_demangle
+} // namespace llvm
+
+#endif

diff  --git a/llvm/lib/Demangle/CMakeLists.txt b/llvm/lib/Demangle/CMakeLists.txt
index 1368d911f028a..86e2d49dddf2b 100644
--- a/llvm/lib/Demangle/CMakeLists.txt
+++ b/llvm/lib/Demangle/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_component_library(LLVMDemangle
   ItaniumDemangle.cpp
   MicrosoftDemangle.cpp
   MicrosoftDemangleNodes.cpp
+  RustDemangle.cpp
 
   ADDITIONAL_HEADER_DIRS
   "${LLVM_MAIN_INCLUDE_DIR}/llvm/Demangle"

diff  --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
new file mode 100644
index 0000000000000..d196d66bd78ee
--- /dev/null
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -0,0 +1,276 @@
+//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a demangler for Rust v0 mangled symbols as specified in
+// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/RustDemangle.h"
+#include "llvm/Demangle/Demangle.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <limits>
+
+using namespace llvm;
+using namespace rust_demangle;
+
+char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
+                         int *Status) {
+  if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
+    if (Status != nullptr)
+      *Status = demangle_invalid_args;
+    return nullptr;
+  }
+
+  // Return early if mangled name doesn't look like a Rust symbol.
+  StringView Mangled(MangledName);
+  if (!Mangled.startsWith("_R")) {
+    if (Status != nullptr)
+      *Status = demangle_invalid_mangled_name;
+    return nullptr;
+  }
+
+  Demangler D;
+  if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
+    if (Status != nullptr)
+      *Status = demangle_memory_alloc_failure;
+    return nullptr;
+  }
+
+  if (!D.demangle(Mangled)) {
+    if (Status != nullptr)
+      *Status = demangle_invalid_mangled_name;
+    std::free(D.Output.getBuffer());
+    return nullptr;
+  }
+
+  D.Output += '\0';
+  char *Demangled = D.Output.getBuffer();
+  size_t DemangledLen = D.Output.getCurrentPosition();
+
+  if (Buf != nullptr) {
+    if (DemangledLen <= *N) {
+      std::memcpy(Buf, Demangled, DemangledLen);
+      std::free(Demangled);
+      Demangled = Buf;
+    } else {
+      std::free(Buf);
+    }
+  }
+
+  if (N != nullptr)
+    *N = DemangledLen;
+
+  if (Status != nullptr)
+    *Status = demangle_success;
+
+  return Demangled;
+}
+
+Demangler::Demangler(size_t MaxRecursionLevel)
+    : MaxRecursionLevel(MaxRecursionLevel) {}
+
+static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
+
+static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
+
+static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
+
+/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
+static inline bool isValid(const char C) {
+  return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
+}
+
+// Demangles Rust v0 mangled symbol. Returns true when successful, and false
+// otherwise. The demangled symbol is stored in Output field. It is
+// responsibility of the caller to free the memory behind the output stream.
+//
+// <symbol-name> = "_R" <path> [<instantiating-crate>]
+bool Demangler::demangle(StringView Mangled) {
+  Position = 0;
+  Error = false;
+  RecursionLevel = 0;
+
+  if (!Mangled.consumeFront("_R")) {
+    Error = true;
+    return false;
+  }
+  Input = Mangled;
+
+  demanglePath();
+
+  // FIXME parse optional <instantiating-crate>.
+
+  if (Position != Input.size())
+    Error = true;
+
+  return !Error;
+}
+
+// <path> = "C" <identifier>               // crate root
+//        | "M" <impl-path> <type>         // <T> (inherent impl)
+//        | "X" <impl-path> <type> <path>  // <T as Trait> (trait impl)
+//        | "Y" <type> <path>              // <T as Trait> (trait definition)
+//        | "N" <ns> <path> <identifier>   // ...::ident (nested path)
+//        | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
+//        | <backref>
+// <identifier> = [<disambiguator>] <undisambiguated-identifier>
+// <ns> = "C"      // closure
+//      | "S"      // shim
+//      | <A-Z>    // other special namespaces
+//      | <a-z>    // internal namespaces
+void Demangler::demanglePath() {
+  if (Error || RecursionLevel >= MaxRecursionLevel) {
+    Error = true;
+    return;
+  }
+  RecursionLevel += 1;
+
+  switch (consume()) {
+  case 'C': {
+    parseOptionalBase62Number('s');
+    Identifier Ident = parseIdentifier();
+    print(Ident.Name);
+    break;
+  }
+  case 'N': {
+    char NS = consume();
+    if (!isLower(NS) && !isUpper(NS)) {
+      Error = true;
+      break;
+    }
+    demanglePath();
+
+    parseOptionalBase62Number('s');
+    Identifier Ident = parseIdentifier();
+
+    if (!Ident.empty()) {
+      // FIXME print special namespaces:
+      // * "C" closures
+      // * "S" shim
+      print("::");
+      print(Ident.Name);
+    }
+    break;
+  }
+  default:
+    // FIXME parse remaining productions.
+    Error = true;
+    break;
+  }
+
+  RecursionLevel -= 1;
+}
+
+// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
+Identifier Demangler::parseIdentifier() {
+  bool Punycode = consumeIf('u');
+  uint64_t Bytes = parseDecimalNumber();
+
+  // Underscore resolves the ambiguity when identifier starts with a decimal
+  // digit or another underscore.
+  consumeIf('_');
+
+  if (Error || Bytes > Input.size() - Position) {
+    Error = true;
+    return {};
+  }
+  StringView S = Input.substr(Position, Bytes);
+  Position += Bytes;
+
+  if (!std::all_of(S.begin(), S.end(), isValid)) {
+    Error = true;
+    return {};
+  }
+
+  return {S, Punycode};
+}
+
+// Parses optional base 62 number. The presence of a number is determined using
+// Tag.
+void Demangler::parseOptionalBase62Number(char Tag) {
+  // Parsing result is currently unused.
+  if (consumeIf(Tag))
+    parseBase62Number();
+}
+
+// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
+// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
+// "1_" encodes 2, etc.
+//
+// <base-62-number> = {<0-9a-zA-Z>} "_"
+uint64_t Demangler::parseBase62Number() {
+  if (consumeIf('_'))
+    return 0;
+
+  uint64_t Value = 0;
+
+  while (true) {
+    uint64_t Digit;
+    char C = consume();
+
+    if (C == '_') {
+      break;
+    } else if (isDigit(C)) {
+      Digit = C - '0';
+    } else if (isLower(C)) {
+      Digit = 10 + (C - 'a');
+    } else if (isUpper(C)) {
+      Digit = 10 + 26 + (C - 'A');
+    } else {
+      Error = true;
+      return 0;
+    }
+
+    if (!mulAssign(Value, 62))
+      return 0;
+
+    if (!addAssign(Value, Digit))
+      return 0;
+  }
+
+  if (!addAssign(Value, 1))
+    return 0;
+
+  return Value;
+}
+
+// Parses a decimal number that had been encoded without any leading zeros.
+//
+// <decimal-number> = "0"
+//                  | <1-9> {<0-9>}
+uint64_t Demangler::parseDecimalNumber() {
+  char C = look();
+  if (!isDigit(C)) {
+    Error = true;
+    return 0;
+  }
+
+  if (C == '0') {
+    consume();
+    return 0;
+  }
+
+  uint64_t Value = 0;
+
+  while (isDigit(look())) {
+    if (!mulAssign(Value, 10)) {
+      Error = true;
+      return 0;
+    }
+
+    uint64_t D = consume() - '0';
+    if (!addAssign(Value, D))
+      return 0;
+  }
+
+  return Value;
+}

diff  --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test
new file mode 100644
index 0000000000000..1be0692c63618
--- /dev/null
+++ b/llvm/test/Demangle/rust.test
@@ -0,0 +1,43 @@
+RUN: llvm-cxxfilt -n  < %s | FileCheck --match-full-lines %s
+
+CHECK: a::main
+       _RNvC1a4main
+
+CHECK: hello::rust
+       _RNvCshGpAVYOtgW1_5hello4rust
+
+CHECK: a::b::c
+       _RNvNvC1a1b1c
+
+; Invalid mangled characters
+
+CHECK: _RNvC2a.1c
+       _RNvC2a.1c
+
+CHECK: _RNvC2a$1c
+       _RNvC2a$1c
+
+; Invalid identifier length (UINT64_MAX + 3, which happens to be ok after a wraparound).
+
+CHECK: _RNvC2ab18446744073709551618xy
+       _RNvC2ab18446744073709551618xy
+
+; Mangling scheme includes an optional encoding version. When present it would
+; indicate an encoding we don't support yet. Check that it is rejected:
+
+CHECK: _R0NvC1a4main
+       _R0NvC1a4main
+
+; Early EOF
+
+CHECK: _RNv
+       _RNv
+
+CHECK: _RNvC
+       _RNvC
+
+CHECK: _RNvC1a5main
+       _RNvC1a5main
+
+CHECK: _RNvC1a20abc
+       _RNvC1a20abc

diff  --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index 93d6322a167ee..ac569abb93f0f 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -97,6 +97,11 @@ static std::string demangle(const std::string &Mangled) {
     Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
   }
 
+  if (!Undecorated &&
+      (DecoratedLength >= 2 && strncmp(DecoratedStr, "_R", 2) == 0)) {
+    Undecorated = rustDemangle(DecoratedStr, nullptr, nullptr, &Status);
+  }
+
   std::string Result(Undecorated ? Prefix + Undecorated : Mangled);
   free(Undecorated);
   return Result;

diff  --git a/llvm/unittests/Demangle/CMakeLists.txt b/llvm/unittests/Demangle/CMakeLists.txt
index 8db2595f39c95..4bcc9bb322ea9 100644
--- a/llvm/unittests/Demangle/CMakeLists.txt
+++ b/llvm/unittests/Demangle/CMakeLists.txt
@@ -7,5 +7,6 @@ add_llvm_unittest(DemangleTests
   DemangleTest.cpp
   ItaniumDemangleTest.cpp
   PartialDemangleTest.cpp
+  RustDemangleTest.cpp
   StringViewTest.cpp
 )

diff  --git a/llvm/unittests/Demangle/RustDemangleTest.cpp b/llvm/unittests/Demangle/RustDemangleTest.cpp
new file mode 100644
index 0000000000000..670e48ed216fa
--- /dev/null
+++ b/llvm/unittests/Demangle/RustDemangleTest.cpp
@@ -0,0 +1,90 @@
+//===------------------ RustDemangleTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <cstdlib>
+
+TEST(RustDemangle, Success) {
+  char *Demangled =
+      llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, nullptr);
+  EXPECT_STREQ(Demangled, "a::main");
+  std::free(Demangled);
+
+  // With status.
+  int Status = 0;
+  Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, &Status);
+  EXPECT_EQ(Status, llvm::demangle_success);
+  EXPECT_STREQ(Demangled, "a::main");
+  std::free(Demangled);
+
+  // With status and length.
+  size_t N = 0;
+  Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, &N, &Status);
+  EXPECT_EQ(Status, llvm::demangle_success);
+  EXPECT_EQ(N, 8u);
+  EXPECT_STREQ(Demangled, "a::main");
+  std::free(Demangled);
+}
+
+TEST(RustDemangle, Invalid) {
+  int Status = 0;
+  char *Demangled = nullptr;
+
+  // Invalid prefix.
+  Demangled = llvm::rustDemangle("_ABCDEF", nullptr, nullptr, &Status);
+  EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name);
+  EXPECT_EQ(Demangled, nullptr);
+
+  // Correct prefix but still invalid.
+  Demangled = llvm::rustDemangle("_RRR", nullptr, nullptr, &Status);
+  EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name);
+  EXPECT_EQ(Demangled, nullptr);
+}
+
+TEST(RustDemangle, OutputBufferWithoutLength) {
+  char *Buffer = static_cast<char *>(std::malloc(1024));
+  ASSERT_NE(Buffer, nullptr);
+
+  int Status = 0;
+  char *Demangled =
+      llvm::rustDemangle("_RNvC1a4main", Buffer, nullptr, &Status);
+
+  EXPECT_EQ(Status, llvm::demangle_invalid_args);
+  EXPECT_EQ(Demangled, nullptr);
+  std::free(Buffer);
+}
+
+TEST(RustDemangle, OutputBuffer) {
+  size_t N = 1024;
+  char *Buffer = static_cast<char *>(std::malloc(N));
+  ASSERT_NE(Buffer, nullptr);
+
+  int Status = 0;
+  char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status);
+
+  EXPECT_EQ(Status, llvm::demangle_success);
+  EXPECT_EQ(Demangled, Buffer);
+  EXPECT_STREQ(Demangled, "a::main");
+  std::free(Demangled);
+}
+
+TEST(RustDemangle, SmallOutputBuffer) {
+  size_t N = 1;
+  char *Buffer = static_cast<char *>(std::malloc(N));
+  ASSERT_NE(Buffer, nullptr);
+
+  int Status = 0;
+  char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status);
+
+  EXPECT_EQ(Status, llvm::demangle_success);
+  EXPECT_STREQ(Demangled, "a::main");
+  std::free(Demangled);
+}


        


More information about the llvm-commits mailing list