[llvm] cd74dd1 - [Demangle][Rust] Parse integer constants
Tomasz Miąsko via llvm-commits
llvm-commits at lists.llvm.org
Fri May 14 10:49:07 PDT 2021
Author: Tomasz Miąsko
Date: 2021-05-14T19:47:19+02:00
New Revision: cd74dd178b98b658a61028df112dd4ca1a552561
URL: https://github.com/llvm/llvm-project/commit/cd74dd178b98b658a61028df112dd4ca1a552561
DIFF: https://github.com/llvm/llvm-project/commit/cd74dd178b98b658a61028df112dd4ca1a552561.diff
LOG: [Demangle][Rust] Parse integer constants
Reviewed By: dblaikie
Differential Revision: https://reviews.llvm.org/D102179
Added:
Modified:
llvm/include/llvm/Demangle/RustDemangle.h
llvm/lib/Demangle/RustDemangle.cpp
llvm/test/Demangle/rust.test
Removed:
################################################################################
diff --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h
index 9a8c3620250ae..f7fa21328648a 100644
--- a/llvm/include/llvm/Demangle/RustDemangle.h
+++ b/llvm/include/llvm/Demangle/RustDemangle.h
@@ -28,6 +28,30 @@ struct Identifier {
bool empty() const { return Name.empty(); }
};
+enum class BasicType {
+ Bool,
+ Char,
+ I8,
+ I16,
+ I32,
+ I64,
+ I128,
+ ISize,
+ U8,
+ U16,
+ U32,
+ U64,
+ U128,
+ USize,
+ F32,
+ F64,
+ Str,
+ Placeholder,
+ Unit,
+ Variadic,
+ Never,
+};
+
class Demangler {
// Maximum recursion level. Used to avoid stack overflow.
size_t MaxRecursionLevel;
@@ -54,11 +78,14 @@ class Demangler {
void demanglePath();
void demangleGenericArg();
void demangleType();
+ void demangleConst();
+ void demangleConstInt();
Identifier parseIdentifier();
uint64_t parseOptionalBase62Number(char Tag);
uint64_t parseBase62Number();
uint64_t parseDecimalNumber();
+ uint64_t parseHexNumber(StringView &HexDigits);
void print(char C) {
if (Error)
@@ -81,6 +108,8 @@ class Demangler {
Output << N;
}
+ void printBasicType(BasicType);
+
char look() const {
if (Error || Position >= Input.size())
return 0;
diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index 2687978218ffc..1bc9d47f6940c 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -80,6 +80,10 @@ Demangler::Demangler(size_t MaxRecursionLevel)
static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
+static inline bool isHexDigit(const char C) {
+ return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
+}
+
static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
@@ -200,39 +204,13 @@ void Demangler::demanglePath() {
// | "K" <const>
// <lifetime> = "L" <base-62-number>
void Demangler::demangleGenericArg() {
- // FIXME parse remaining productions
- demangleType();
+ if (consumeIf('K'))
+ demangleConst();
+ else
+ demangleType();
+ // FIXME demangle lifetimes.
}
-static const char *const BasicTypes[] = {
- "i8", // a
- "bool", // b
- "char", // c
- "f64", // d
- "str", // e
- "f32", // f
- nullptr, // g
- "u8", // h
- "isize", // i
- "usize", // j
- nullptr, // k
- "i32", // l
- "u32", // m
- "i128", // n
- "u128", // o
- "_", // p
- nullptr, // q
- nullptr, // r
- "i16", // s
- "u16", // t
- "()", // u
- "...", // v
- nullptr, // w
- "i64", // x
- "u64", // y
- "!", // z
-};
-
// <basic-type> = "a" // i8
// | "b" // bool
// | "c" // char
@@ -254,10 +232,142 @@ static const char *const BasicTypes[] = {
// | "y" // u64
// | "z" // !
// | "p" // placeholder (e.g. for generic params), shown as _
-static const char *parseBasicType(char C) {
- if (isLower(C))
- return BasicTypes[C - 'a'];
- return nullptr;
+static bool parseBasicType(char C, BasicType &Type) {
+ switch (C) {
+ case 'a':
+ Type = BasicType::I8;
+ return true;
+ case 'b':
+ Type = BasicType::Bool;
+ return true;
+ case 'c':
+ Type = BasicType::Char;
+ return true;
+ case 'd':
+ Type = BasicType::F64;
+ return true;
+ case 'e':
+ Type = BasicType::Str;
+ return true;
+ case 'f':
+ Type = BasicType::F32;
+ return true;
+ case 'h':
+ Type = BasicType::U8;
+ return true;
+ case 'i':
+ Type = BasicType::ISize;
+ return true;
+ case 'j':
+ Type = BasicType::USize;
+ return true;
+ case 'l':
+ Type = BasicType::I32;
+ return true;
+ case 'm':
+ Type = BasicType::U32;
+ return true;
+ case 'n':
+ Type = BasicType::I128;
+ return true;
+ case 'o':
+ Type = BasicType::U128;
+ return true;
+ case 'p':
+ Type = BasicType::Placeholder;
+ return true;
+ case 's':
+ Type = BasicType::I16;
+ return true;
+ case 't':
+ Type = BasicType::U16;
+ return true;
+ case 'u':
+ Type = BasicType::Unit;
+ return true;
+ case 'v':
+ Type = BasicType::Variadic;
+ return true;
+ case 'x':
+ Type = BasicType::I64;
+ return true;
+ case 'y':
+ Type = BasicType::U64;
+ return true;
+ case 'z':
+ Type = BasicType::Never;
+ return true;
+ default:
+ return false;
+ }
+}
+
+void Demangler::printBasicType(BasicType Type) {
+ switch (Type) {
+ case BasicType::Bool:
+ print("bool");
+ break;
+ case BasicType::Char:
+ print("char");
+ break;
+ case BasicType::I8:
+ print("i8");
+ break;
+ case BasicType::I16:
+ print("i16");
+ break;
+ case BasicType::I32:
+ print("i32");
+ break;
+ case BasicType::I64:
+ print("i64");
+ break;
+ case BasicType::I128:
+ print("i128");
+ break;
+ case BasicType::ISize:
+ print("isize");
+ break;
+ case BasicType::U8:
+ print("u8");
+ break;
+ case BasicType::U16:
+ print("u16");
+ break;
+ case BasicType::U32:
+ print("u32");
+ break;
+ case BasicType::U64:
+ print("u64");
+ break;
+ case BasicType::U128:
+ print("u128");
+ break;
+ case BasicType::USize:
+ print("usize");
+ break;
+ case BasicType::F32:
+ print("f32");
+ break;
+ case BasicType::F64:
+ print("f64");
+ break;
+ case BasicType::Str:
+ print("str");
+ break;
+ case BasicType::Placeholder:
+ print("_");
+ break;
+ case BasicType::Unit:
+ print("()");
+ break;
+ case BasicType::Variadic:
+ print("...");
+ break;
+ case BasicType::Never:
+ print("!");
+ break;
+ }
}
// <type> = | <basic-type>
@@ -273,14 +383,62 @@ static const char *parseBasicType(char C) {
// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
// | <backref> // backref
void Demangler::demangleType() {
- if (const char *BasicType = parseBasicType(consume())) {
- print(BasicType);
+ BasicType Type;
+ if (parseBasicType(consume(), Type))
+ printBasicType(Type);
+ else
+ Error = true; // FIXME parse remaining productions.
+}
+
+// <const> = <basic-type> <const-data>
+// | "p" // placeholder
+// | <backref>
+void Demangler::demangleConst() {
+ BasicType Type;
+ if (parseBasicType(consume(), Type)) {
+ switch (Type) {
+ case BasicType::I8:
+ case BasicType::I16:
+ case BasicType::I32:
+ case BasicType::I64:
+ case BasicType::I128:
+ case BasicType::ISize:
+ case BasicType::U8:
+ case BasicType::U16:
+ case BasicType::U32:
+ case BasicType::U64:
+ case BasicType::U128:
+ case BasicType::USize:
+ demangleConstInt();
+ break;
+ case BasicType::Placeholder:
+ print('_');
+ break;
+ default:
+ // FIXME demangle backreferences, bool constants, and char constants.
+ Error = true;
+ break;
+ }
} else {
- // FIXME parse remaining productions.
Error = true;
}
}
+// <const-data> = ["n"] <hex-number>
+void Demangler::demangleConstInt() {
+ if (consumeIf('n'))
+ print('-');
+
+ StringView HexDigits;
+ uint64_t Value = parseHexNumber(HexDigits);
+ if (HexDigits.size() <= 16) {
+ printDecimalNumber(Value);
+ } else {
+ print("0x");
+ print(HexDigits);
+ }
+}
+
// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
Identifier Demangler::parseIdentifier() {
bool Punycode = consumeIf('u');
@@ -390,3 +548,43 @@ uint64_t Demangler::parseDecimalNumber() {
return Value;
}
+
+// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
+// value and stores hex digits in HexDigits. The return value is unspecified if
+// HexDigits.size() > 16.
+//
+// <hex-number> = "0_"
+// | <1-9a-f> {<0-9a-f>} "_"
+uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
+ size_t Start = Position;
+ uint64_t Value = 0;
+
+ if (!isHexDigit(look()))
+ Error = true;
+
+ if (consumeIf('0')) {
+ if (!consumeIf('_'))
+ Error = true;
+ } else {
+ while (!Error && !consumeIf('_')) {
+ char C = consume();
+ Value *= 16;
+ if (isDigit(C))
+ Value += C - '0';
+ else if ('a' <= C && C <= 'f')
+ Value += 10 + (C - 'a');
+ else
+ Error = true;
+ }
+ }
+
+ if (Error) {
+ HexDigits = StringView();
+ return 0;
+ }
+
+ size_t End = Position - 1;
+ assert(Start < End);
+ HexDigits = Input.substr(Start, End - Start);
+ return Value;
+}
diff --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test
index 11012491e2026..256d594206ebb 100644
--- a/llvm/test/Demangle/rust.test
+++ b/llvm/test/Demangle/rust.test
@@ -44,6 +44,11 @@ CHECK: generic::<_, _>
CHECK: generic::<_, _, _>
_RIC7genericpppE
+; Generic const arguments
+
+CHECK: generic_const::<_>
+ _RIC13generic_constKpE
+
; Basic types
CHECK: basic::<i8>
@@ -109,6 +114,83 @@ CHECK: basic::<u64>
CHECK: basic::<!>
_RIC5basiczE
+; Integer constants. Test value demangling.
+
+CHECK: integer::<0>
+ _RIC7integerKi0_E
+
+CHECK: integer::<1>
+ _RIC7integerKi1_E
+
+CHECK: integer::<-1>
+ _RIC7integerKin1_E
+
+CHECK: integer::<-15>
+ _RIC7integerKinf_E
+
+CHECK: integer::<-16>
+ _RIC7integerKin10_E
+
+CHECK: integer::<18446744073709551615>
+ _RIC7integerKoffffffffffffffff_E
+
+CHECK: integer::<0x10000000000000000>
+ _RIC7integerKo10000000000000000_E
+
+CHECK: integer::<-0x123456789abcdef01>
+ _RIC7integerKnn123456789abcdef01_E
+
+; Invalid integer constant without any digits:
+
+CHECK: _RIC7integerKi_E
+ _RIC7integerKi_E
+
+; Invalid integer constants with insignificant leading zeros:
+
+CHECK: _RIC7integerKi00_E
+ _RIC7integerKi00_E
+
+CHECK: _RIC7integerKi01_E
+ _RIC7integerKi01_E
+
+; Integer constants. Test all integer types.
+
+CHECK: i8::<0>
+ _RIC2i8Ka0_E
+
+CHECK: u8::<0>
+ _RIC2u8Kh0_E
+
+CHECK: isize::<0>
+ _RIC5isizeKi0_E
+
+CHECK: usize::<0>
+ _RIC5usizeKj0_E
+
+CHECK: i32::<0>
+ _RIC3i32Kl0_E
+
+CHECK: u32::<0>
+ _RIC3u32Km0_E
+
+CHECK: i128::<0>
+ _RIC4i128Kn0_E
+
+CHECK: u128::<0>
+ _RIC4u128Ko0_E
+
+CHECK: i16::<0>
+ _RIC3i16Ks0_E
+
+CHECK: u16::<0>
+ _RIC3u16Kt0_E
+
+CHECK: i64::<0>
+ _RIC3i64Kx0_E
+
+CHECK: u64::<0>
+ _RIC3u64Ky0_E
+
; Invalid mangled characters
CHECK: _RNvC2a.1c
More information about the llvm-commits
mailing list