[llvm] cd74dd1 - [Demangle][Rust] Parse integer constants

Tomasz Miąsko via llvm-commits llvm-commits at lists.llvm.org
Fri May 14 10:49:07 PDT 2021


Author: Tomasz Miąsko
Date: 2021-05-14T19:47:19+02:00
New Revision: cd74dd178b98b658a61028df112dd4ca1a552561

URL: https://github.com/llvm/llvm-project/commit/cd74dd178b98b658a61028df112dd4ca1a552561
DIFF: https://github.com/llvm/llvm-project/commit/cd74dd178b98b658a61028df112dd4ca1a552561.diff

LOG: [Demangle][Rust] Parse integer constants

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D102179

Added: 
    

Modified: 
    llvm/include/llvm/Demangle/RustDemangle.h
    llvm/lib/Demangle/RustDemangle.cpp
    llvm/test/Demangle/rust.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h
index 9a8c3620250ae..f7fa21328648a 100644
--- a/llvm/include/llvm/Demangle/RustDemangle.h
+++ b/llvm/include/llvm/Demangle/RustDemangle.h
@@ -28,6 +28,30 @@ struct Identifier {
   bool empty() const { return Name.empty(); }
 };
 
+enum class BasicType {
+  Bool,
+  Char,
+  I8,
+  I16,
+  I32,
+  I64,
+  I128,
+  ISize,
+  U8,
+  U16,
+  U32,
+  U64,
+  U128,
+  USize,
+  F32,
+  F64,
+  Str,
+  Placeholder,
+  Unit,
+  Variadic,
+  Never,
+};
+
 class Demangler {
   // Maximum recursion level. Used to avoid stack overflow.
   size_t MaxRecursionLevel;
@@ -54,11 +78,14 @@ class Demangler {
   void demanglePath();
   void demangleGenericArg();
   void demangleType();
+  void demangleConst();
+  void demangleConstInt();
 
   Identifier parseIdentifier();
   uint64_t parseOptionalBase62Number(char Tag);
   uint64_t parseBase62Number();
   uint64_t parseDecimalNumber();
+  uint64_t parseHexNumber(StringView &HexDigits);
 
   void print(char C) {
     if (Error)
@@ -81,6 +108,8 @@ class Demangler {
     Output << N;
   }
 
+  void printBasicType(BasicType);
+
   char look() const {
     if (Error || Position >= Input.size())
       return 0;

diff  --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index 2687978218ffc..1bc9d47f6940c 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -80,6 +80,10 @@ Demangler::Demangler(size_t MaxRecursionLevel)
 
 static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
 
+static inline bool isHexDigit(const char C) {
+  return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
+}
+
 static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
 
 static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
@@ -200,39 +204,13 @@ void Demangler::demanglePath() {
 //               | "K" <const>
 // <lifetime> = "L" <base-62-number>
 void Demangler::demangleGenericArg() {
-  // FIXME parse remaining productions
-  demangleType();
+  if (consumeIf('K'))
+    demangleConst();
+  else
+    demangleType();
+  // FIXME demangle lifetimes.
 }
 
-static const char *const BasicTypes[] = {
-    "i8",    // a
-    "bool",  // b
-    "char",  // c
-    "f64",   // d
-    "str",   // e
-    "f32",   // f
-    nullptr, // g
-    "u8",    // h
-    "isize", // i
-    "usize", // j
-    nullptr, // k
-    "i32",   // l
-    "u32",   // m
-    "i128",  // n
-    "u128",  // o
-    "_",     // p
-    nullptr, // q
-    nullptr, // r
-    "i16",   // s
-    "u16",   // t
-    "()",    // u
-    "...",   // v
-    nullptr, // w
-    "i64",   // x
-    "u64",   // y
-    "!",     // z
-};
-
 // <basic-type> = "a"      // i8
 //              | "b"      // bool
 //              | "c"      // char
@@ -254,10 +232,142 @@ static const char *const BasicTypes[] = {
 //              | "y"      // u64
 //              | "z"      // !
 //              | "p"      // placeholder (e.g. for generic params), shown as _
-static const char *parseBasicType(char C) {
-  if (isLower(C))
-    return BasicTypes[C - 'a'];
-  return nullptr;
+static bool parseBasicType(char C, BasicType &Type) {
+  switch (C) {
+  case 'a':
+    Type = BasicType::I8;
+    return true;
+  case 'b':
+    Type = BasicType::Bool;
+    return true;
+  case 'c':
+    Type = BasicType::Char;
+    return true;
+  case 'd':
+    Type = BasicType::F64;
+    return true;
+  case 'e':
+    Type = BasicType::Str;
+    return true;
+  case 'f':
+    Type = BasicType::F32;
+    return true;
+  case 'h':
+    Type = BasicType::U8;
+    return true;
+  case 'i':
+    Type = BasicType::ISize;
+    return true;
+  case 'j':
+    Type = BasicType::USize;
+    return true;
+  case 'l':
+    Type = BasicType::I32;
+    return true;
+  case 'm':
+    Type = BasicType::U32;
+    return true;
+  case 'n':
+    Type = BasicType::I128;
+    return true;
+  case 'o':
+    Type = BasicType::U128;
+    return true;
+  case 'p':
+    Type = BasicType::Placeholder;
+    return true;
+  case 's':
+    Type = BasicType::I16;
+    return true;
+  case 't':
+    Type = BasicType::U16;
+    return true;
+  case 'u':
+    Type = BasicType::Unit;
+    return true;
+  case 'v':
+    Type = BasicType::Variadic;
+    return true;
+  case 'x':
+    Type = BasicType::I64;
+    return true;
+  case 'y':
+    Type = BasicType::U64;
+    return true;
+  case 'z':
+    Type = BasicType::Never;
+    return true;
+  default:
+    return false;
+  }
+}
+
+void Demangler::printBasicType(BasicType Type) {
+  switch (Type) {
+  case BasicType::Bool:
+    print("bool");
+    break;
+  case BasicType::Char:
+    print("char");
+    break;
+  case BasicType::I8:
+    print("i8");
+    break;
+  case BasicType::I16:
+    print("i16");
+    break;
+  case BasicType::I32:
+    print("i32");
+    break;
+  case BasicType::I64:
+    print("i64");
+    break;
+  case BasicType::I128:
+    print("i128");
+    break;
+  case BasicType::ISize:
+    print("isize");
+    break;
+  case BasicType::U8:
+    print("u8");
+    break;
+  case BasicType::U16:
+    print("u16");
+    break;
+  case BasicType::U32:
+    print("u32");
+    break;
+  case BasicType::U64:
+    print("u64");
+    break;
+  case BasicType::U128:
+    print("u128");
+    break;
+  case BasicType::USize:
+    print("usize");
+    break;
+  case BasicType::F32:
+    print("f32");
+    break;
+  case BasicType::F64:
+    print("f64");
+    break;
+  case BasicType::Str:
+    print("str");
+    break;
+  case BasicType::Placeholder:
+    print("_");
+    break;
+  case BasicType::Unit:
+    print("()");
+    break;
+  case BasicType::Variadic:
+    print("...");
+    break;
+  case BasicType::Never:
+    print("!");
+    break;
+  }
 }
 
 // <type> = | <basic-type>
@@ -273,14 +383,62 @@ static const char *parseBasicType(char C) {
 //          | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
 //          | <backref>                   // backref
 void Demangler::demangleType() {
-  if (const char *BasicType = parseBasicType(consume())) {
-    print(BasicType);
+  BasicType Type;
+  if (parseBasicType(consume(), Type))
+    printBasicType(Type);
+  else
+    Error = true; // FIXME parse remaining productions.
+}
+
+// <const> = <basic-type> <const-data>
+//         | "p"                          // placeholder
+//         | <backref>
+void Demangler::demangleConst() {
+  BasicType Type;
+  if (parseBasicType(consume(), Type)) {
+    switch (Type) {
+    case BasicType::I8:
+    case BasicType::I16:
+    case BasicType::I32:
+    case BasicType::I64:
+    case BasicType::I128:
+    case BasicType::ISize:
+    case BasicType::U8:
+    case BasicType::U16:
+    case BasicType::U32:
+    case BasicType::U64:
+    case BasicType::U128:
+    case BasicType::USize:
+      demangleConstInt();
+      break;
+    case BasicType::Placeholder:
+      print('_');
+      break;
+    default:
+      // FIXME demangle backreferences, bool constants, and char constants.
+      Error = true;
+      break;
+    }
   } else {
-    // FIXME parse remaining productions.
     Error = true;
   }
 }
 
+// <const-data> = ["n"] <hex-number>
+void Demangler::demangleConstInt() {
+  if (consumeIf('n'))
+    print('-');
+
+  StringView HexDigits;
+  uint64_t Value = parseHexNumber(HexDigits);
+  if (HexDigits.size() <= 16) {
+    printDecimalNumber(Value);
+  } else {
+    print("0x");
+    print(HexDigits);
+  }
+}
+
 // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
 Identifier Demangler::parseIdentifier() {
   bool Punycode = consumeIf('u');
@@ -390,3 +548,43 @@ uint64_t Demangler::parseDecimalNumber() {
 
   return Value;
 }
+
+// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
+// value and stores hex digits in HexDigits. The return value is unspecified if
+// HexDigits.size() > 16.
+//
+// <hex-number> = "0_"
+//              | <1-9a-f> {<0-9a-f>} "_"
+uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
+  size_t Start = Position;
+  uint64_t Value = 0;
+
+  if (!isHexDigit(look()))
+    Error = true;
+
+  if (consumeIf('0')) {
+    if (!consumeIf('_'))
+      Error = true;
+  } else {
+    while (!Error && !consumeIf('_')) {
+      char C = consume();
+      Value *= 16;
+      if (isDigit(C))
+        Value += C - '0';
+      else if ('a' <= C && C <= 'f')
+        Value += 10 + (C - 'a');
+      else
+        Error = true;
+    }
+  }
+
+  if (Error) {
+    HexDigits = StringView();
+    return 0;
+  }
+
+  size_t End = Position - 1;
+  assert(Start < End);
+  HexDigits = Input.substr(Start, End - Start);
+  return Value;
+}

diff  --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test
index 11012491e2026..256d594206ebb 100644
--- a/llvm/test/Demangle/rust.test
+++ b/llvm/test/Demangle/rust.test
@@ -44,6 +44,11 @@ CHECK: generic::<_, _>
 CHECK: generic::<_, _, _>
        _RIC7genericpppE
 
+; Generic const arguments
+
+CHECK: generic_const::<_>
+       _RIC13generic_constKpE
+
 ; Basic types
 
 CHECK: basic::<i8>
@@ -109,6 +114,83 @@ CHECK: basic::<u64>
 CHECK: basic::<!>
        _RIC5basiczE
 
+; Integer constants. Test value demangling.
+
+CHECK: integer::<0>
+       _RIC7integerKi0_E
+
+CHECK: integer::<1>
+       _RIC7integerKi1_E
+
+CHECK: integer::<-1>
+       _RIC7integerKin1_E
+
+CHECK: integer::<-15>
+       _RIC7integerKinf_E
+
+CHECK: integer::<-16>
+       _RIC7integerKin10_E
+
+CHECK: integer::<18446744073709551615>
+       _RIC7integerKoffffffffffffffff_E
+
+CHECK: integer::<0x10000000000000000>
+       _RIC7integerKo10000000000000000_E
+
+CHECK: integer::<-0x123456789abcdef01>
+       _RIC7integerKnn123456789abcdef01_E
+
+; Invalid integer constant without any digits:
+
+CHECK: _RIC7integerKi_E
+       _RIC7integerKi_E
+
+; Invalid integer constants with insignificant leading zeros:
+
+CHECK: _RIC7integerKi00_E
+       _RIC7integerKi00_E
+
+CHECK: _RIC7integerKi01_E
+       _RIC7integerKi01_E
+
+; Integer constants. Test all integer types.
+
+CHECK: i8::<0>
+       _RIC2i8Ka0_E
+
+CHECK: u8::<0>
+       _RIC2u8Kh0_E
+
+CHECK: isize::<0>
+       _RIC5isizeKi0_E
+
+CHECK: usize::<0>
+       _RIC5usizeKj0_E
+
+CHECK: i32::<0>
+       _RIC3i32Kl0_E
+
+CHECK: u32::<0>
+       _RIC3u32Km0_E
+
+CHECK: i128::<0>
+       _RIC4i128Kn0_E
+
+CHECK: u128::<0>
+       _RIC4u128Ko0_E
+
+CHECK: i16::<0>
+       _RIC3i16Ks0_E
+
+CHECK: u16::<0>
+       _RIC3u16Kt0_E
+
+CHECK: i64::<0>
+       _RIC3i64Kx0_E
+
+CHECK: u64::<0>
+       _RIC3u64Ky0_E
+
 ; Invalid mangled characters
 
 CHECK: _RNvC2a.1c


        


More information about the llvm-commits mailing list