[clang] f33c2c2 - Fix crash on `user defined literals`

Eduardo Caldas via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 10 09:21:23 PDT 2020


Author: Eduardo Caldas
Date: 2020-07-10T16:21:11Z
New Revision: f33c2c27a8d4ea831aa7c2c2649066be91318d85

URL: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85
DIFF: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85.diff

LOG: Fix crash on `user defined literals`

Summary:
Given an UserDefinedLiteral `1.2_w`:
Problem: Lexer generates one Token for the literal, but ClangAST
references two source locations
Fix: Ignore the operator and interpret it as the underlying literal.
e.g.: `1.2_w` token generates syntax node IntegerLiteral(1.2_w)

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D82157

Added: 
    

Modified: 
    clang/include/clang/Tooling/Syntax/Nodes.h
    clang/lib/Tooling/Syntax/BuildTree.cpp
    clang/lib/Tooling/Syntax/Nodes.cpp
    clang/unittests/Tooling/Syntax/TreeTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index 97605ceb76b7..fb63c36bc4cc 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -50,6 +50,11 @@ enum class NodeKind : uint16_t {
   StringLiteralExpression,
   BoolLiteralExpression,
   CxxNullPtrExpression,
+  UnknownUserDefinedLiteralExpression,
+  IntegerUserDefinedLiteralExpression,
+  FloatUserDefinedLiteralExpression,
+  CharUserDefinedLiteralExpression,
+  StringUserDefinedLiteralExpression,
   IdExpression,
 
   // Statements.
@@ -325,6 +330,88 @@ class CxxNullPtrExpression final : public Expression {
   syntax::Leaf *nullPtrKeyword();
 };
 
+/// Expression for user-defined literal. C++ [lex.ext]
+/// user-defined-literal:
+///   user-defined-integer-literal
+///   user-defined-floating-point-literal
+///   user-defined-string-literal
+///   user-defined-character-literal
+class UserDefinedLiteralExpression : public Expression {
+public:
+  UserDefinedLiteralExpression(NodeKind K) : Expression(K) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression ||
+           N->kind() == NodeKind::IntegerUserDefinedLiteralExpression ||
+           N->kind() == NodeKind::FloatUserDefinedLiteralExpression ||
+           N->kind() == NodeKind::CharUserDefinedLiteralExpression ||
+           N->kind() == NodeKind::StringUserDefinedLiteralExpression;
+  }
+  syntax::Leaf *literalToken();
+};
+
+// We cannot yet distinguish between user-defined-integer-literal and
+// user-defined-floating-point-literal, when using raw literal operator or
+// numeric literal operator. C++ [lex.ext]p3, p4
+/// Expression for an unknown user-defined-literal.
+class UnknownUserDefinedLiteralExpression final
+    : public UserDefinedLiteralExpression {
+public:
+  UnknownUserDefinedLiteralExpression()
+      : UserDefinedLiteralExpression(
+            NodeKind::UnknownUserDefinedLiteralExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression;
+  }
+};
+
+/// Expression for user-defined-integer-literal. C++ [lex.ext]
+class IntegerUserDefinedLiteralExpression final
+    : public UserDefinedLiteralExpression {
+public:
+  IntegerUserDefinedLiteralExpression()
+      : UserDefinedLiteralExpression(
+            NodeKind::IntegerUserDefinedLiteralExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression;
+  }
+};
+
+/// Expression for user-defined-floating-point-literal. C++ [lex.ext]
+class FloatUserDefinedLiteralExpression final
+    : public UserDefinedLiteralExpression {
+public:
+  FloatUserDefinedLiteralExpression()
+      : UserDefinedLiteralExpression(
+            NodeKind::FloatUserDefinedLiteralExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::FloatUserDefinedLiteralExpression;
+  }
+};
+
+/// Expression for user-defined-character-literal. C++ [lex.ext]
+class CharUserDefinedLiteralExpression final
+    : public UserDefinedLiteralExpression {
+public:
+  CharUserDefinedLiteralExpression()
+      : UserDefinedLiteralExpression(
+            NodeKind::CharUserDefinedLiteralExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::CharUserDefinedLiteralExpression;
+  }
+};
+
+/// Expression for user-defined-string-literal. C++ [lex.ext]
+class StringUserDefinedLiteralExpression final
+    : public UserDefinedLiteralExpression {
+public:
+  StringUserDefinedLiteralExpression()
+      : UserDefinedLiteralExpression(
+            NodeKind::StringUserDefinedLiteralExpression) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::StringUserDefinedLiteralExpression;
+  }
+};
+
 /// An abstract class for prefix and postfix unary operators.
 class UnaryOperatorExpression : public Expression {
 public:

diff  --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index f9fdf47bff26..8204d3fc66f3 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -216,7 +216,8 @@ static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,
   }
   if (Initializer.isValid()) {
     auto InitializerEnd = Initializer.getEnd();
-    assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd);
+    assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||
+           End == InitializerEnd);
     End = InitializerEnd;
   }
   return SourceRange(Start, End);
@@ -708,6 +709,42 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
     return NNS;
   }
 
+  bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {
+    // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node
+    // referencing the location of the UDL suffix (`_w` in `1.2_w`). The
+    // UDL suffix location does not point to the beginning of a token, so we
+    // can't represent the UDL suffix as a separate syntax tree node.
+
+    return WalkUpFromUserDefinedLiteral(S);
+  }
+
+  syntax::NodeKind getUserDefinedLiteralKind(UserDefinedLiteral *S) {
+    switch (S->getLiteralOperatorKind()) {
+    case clang::UserDefinedLiteral::LOK_Integer:
+      return syntax::NodeKind::IntegerUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Floating:
+      return syntax::NodeKind::FloatUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Character:
+      return syntax::NodeKind::CharUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_String:
+      return syntax::NodeKind::StringUserDefinedLiteralExpression;
+    case clang::UserDefinedLiteral::LOK_Raw:
+    case clang::UserDefinedLiteral::LOK_Template:
+      // FIXME: Apply `NumericLiteralParser` to the underlying token to deduce
+      // the right UDL kind. That would require a `Preprocessor` though.
+      return syntax::NodeKind::UnknownUserDefinedLiteralExpression;
+    }
+  }
+
+  bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
+    Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
+    Builder.foldNode(Builder.getExprRange(S),
+                     new (allocator()) syntax::UserDefinedLiteralExpression(
+                         getUserDefinedLiteralKind(S)),
+                     S);
+    return true;
+  }
+
   bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {
     if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc()))
       Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier);
@@ -817,9 +854,9 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
   bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
     if (getOperatorNodeKind(*S) ==
         syntax::NodeKind::PostfixUnaryOperatorExpression) {
-      // A postfix unary operator is declared as taking two operands. The second
-      // operand is used to distinguish from its prefix counterpart. In the
-      // semantic AST this "phantom" operand is represented as a
+      // A postfix unary operator is declared as taking two operands. The
+      // second operand is used to distinguish from its prefix counterpart. In
+      // the semantic AST this "phantom" operand is represented as a
       // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this
       // operand because it does not correspond to anything written in source
       // code

diff  --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 3d9b943d6db1..e1aa2521a2a9 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -32,6 +32,16 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "BoolLiteralExpression";
   case NodeKind::CxxNullPtrExpression:
     return OS << "CxxNullPtrExpression";
+  case NodeKind::UnknownUserDefinedLiteralExpression:
+    return OS << "UnknownUserDefinedLiteralExpression";
+  case NodeKind::IntegerUserDefinedLiteralExpression:
+    return OS << "IntegerUserDefinedLiteralExpression";
+  case NodeKind::FloatUserDefinedLiteralExpression:
+    return OS << "FloatUserDefinedLiteralExpression";
+  case NodeKind::CharUserDefinedLiteralExpression:
+    return OS << "CharUserDefinedLiteralExpression";
+  case NodeKind::StringUserDefinedLiteralExpression:
+    return OS << "StringUserDefinedLiteralExpression";
   case NodeKind::PrefixUnaryOperatorExpression:
     return OS << "PrefixUnaryOperatorExpression";
   case NodeKind::PostfixUnaryOperatorExpression:
@@ -252,6 +262,11 @@ syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() {
       findChild(syntax::NodeRole::LiteralToken));
 }
 
+syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() {
+  return llvm::cast_or_null<syntax::Leaf>(
+      findChild(syntax::NodeRole::LiteralToken));
+}
+
 syntax::Expression *syntax::BinaryOperatorExpression::lhs() {
   return llvm::cast_or_null<syntax::Expression>(
       findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide));

diff  --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index acd0fbf2b52e..91e7a8f33e4e 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -1184,20 +1184,108 @@ void test() {
 )txt"));
 }
 
-TEST_P(SyntaxTreeTest, IntegerLiteral) {
+TEST_P(SyntaxTreeTest, UserDefinedLiteral) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
   EXPECT_TRUE(treeDumpEqual(
       R"cpp(
+unsigned operator "" _i(unsigned long long);
+unsigned operator "" _f(long double);
+unsigned operator "" _c(char);
+
+unsigned operator "" _r(const char*); // raw-literal operator
+
+template <char...>
+unsigned operator "" _t();            // numeric literal operator template
+
 void test() {
-  12;
-  12u;
-  12l;
-  12ul;
-  014;
-  0XC;
+  12_i;  // call: operator "" _i(12uLL)      | kind: integer
+  1.2_f; // call: operator "" _f(1.2L)       | kind: float
+  '2'_c; // call: operator "" _c('2')        | kind: char
+
+  // TODO: Generate `FloatUserDefinedLiteralExpression` and
+  // `IntegerUserDefinedLiteralExpression` instead of
+  // `UnknownUserDefinedLiteralExpression`. See `getUserDefinedLiteralKind`
+  12_r;  // call: operator "" _r("12")       | kind: integer
+  1.2_r; // call: operator "" _i("1.2")      | kind: float
+  12_t;  // call: operator<'1', '2'> "" _x() | kind: integer
+  1.2_t; // call: operator<'1', '2'> "" _x() | kind: float
 }
-)cpp",
+    )cpp",
       R"txt(
 *: TranslationUnit
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_i
+| | `-ParametersAndQualifiers
+| |   |-(
+| |   |-SimpleDeclaration
+| |   | |-unsigned
+| |   | |-long
+| |   | `-long
+| |   `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_f
+| | `-ParametersAndQualifiers
+| |   |-(
+| |   |-SimpleDeclaration
+| |   | |-long
+| |   | `-double
+| |   `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_c
+| | `-ParametersAndQualifiers
+| |   |-(
+| |   |-SimpleDeclaration
+| |   | `-char
+| |   `-)
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_r
+| | `-ParametersAndQualifiers
+| |   |-(
+| |   |-SimpleDeclaration
+| |   | |-const
+| |   | |-char
+| |   | `-SimpleDeclarator
+| |   |   `-*
+| |   `-)
+| `-;
+|-TemplateDeclaration
+| |-template
+| |-<
+| |-SimpleDeclaration
+| | `-char
+| |-...
+| |->
+| `-SimpleDeclaration
+|   |-unsigned
+|   |-SimpleDeclarator
+|   | |-operator
+|   | |-""
+|   | |-_t
+|   | `-ParametersAndQualifiers
+|   |   |-(
+|   |   `-)
+|   `-;
 `-SimpleDeclaration
   |-void
   |-SimpleDeclarator
@@ -1208,28 +1296,95 @@ void test() {
   `-CompoundStatement
     |-{
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-12
+    | |-IntegerUserDefinedLiteralExpression
+    | | `-12_i
     | `-;
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-12u
+    | |-FloatUserDefinedLiteralExpression
+    | | `-1.2_f
     | `-;
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-12l
+    | |-CharUserDefinedLiteralExpression
+    | | `-'2'_c
     | `-;
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-12ul
+    | |-UnknownUserDefinedLiteralExpression
+    | | `-12_r
     | `-;
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-014
+    | |-UnknownUserDefinedLiteralExpression
+    | | `-1.2_r
     | `-;
     |-ExpressionStatement
-    | |-IntegerLiteralExpression
-    | | `-0XC
+    | |-UnknownUserDefinedLiteralExpression
+    | | `-12_t
+    | `-;
+    |-ExpressionStatement
+    | |-UnknownUserDefinedLiteralExpression
+    | | `-1.2_t
+    | `-;
+    `-}
+)txt"));
+}
+
+TEST_P(SyntaxTreeTest, UserDefinedLiteralString) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+  EXPECT_TRUE(treeDumpEqual(
+      R"cpp(
+typedef decltype(sizeof(void *)) size_t;
+unsigned operator "" _s(const char*, size_t);
+void test() {
+  "12"_s;// call: operator "" _s("12")       | kind: string
+}
+    )cpp",
+      R"txt(
+*: TranslationUnit
+|-SimpleDeclaration
+| |-typedef
+| |-decltype
+| |-(
+| |-UnknownExpression
+| | |-sizeof
+| | |-(
+| | |-void
+| | |-*
+| | `-)
+| |-)
+| |-SimpleDeclarator
+| | `-size_t
+| `-;
+|-SimpleDeclaration
+| |-unsigned
+| |-SimpleDeclarator
+| | |-operator
+| | |-""
+| | |-_s
+| | `-ParametersAndQualifiers
+| |   |-(
+| |   |-SimpleDeclaration
+| |   | |-const
+| |   | |-char
+| |   | `-SimpleDeclarator
+| |   |   `-*
+| |   |-,
+| |   |-SimpleDeclaration
+| |   | `-size_t
+| |   `-)
+| `-;
+`-SimpleDeclaration
+  |-void
+  |-SimpleDeclarator
+  | |-test
+  | `-ParametersAndQualifiers
+  |   |-(
+  |   `-)
+  `-CompoundStatement
+    |-{
+    |-ExpressionStatement
+    | |-StringUserDefinedLiteralExpression
+    | | `-"12"_s
     | `-;
     `-}
 )txt"));


        


More information about the cfe-commits mailing list