[clang] 93dc73b - [Lexer] Fix bug in `makeFileCharRange` called on split tokens.

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 14 07:36:53 PDT 2021


Author: Yitzhak Mandelbaum
Date: 2021-07-14T14:36:31Z
New Revision: 93dc73b1e0f31c712e5b8bbac14491ce55c414ad

URL: https://github.com/llvm/llvm-project/commit/93dc73b1e0f31c712e5b8bbac14491ce55c414ad
DIFF: https://github.com/llvm/llvm-project/commit/93dc73b1e0f31c712e5b8bbac14491ce55c414ad.diff

LOG: [Lexer] Fix bug in `makeFileCharRange` called on split tokens.

When the end loc of the specified range is a split token, `makeFileCharRange`
does not process it correctly.  This patch adds proper support for split tokens.

Differential Revision: https://reviews.llvm.org/D105365

Added: 
    

Modified: 
    clang/lib/Lex/Lexer.cpp
    clang/unittests/Lex/LexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index cb2b19b59c4ec..2cc4cae533d07 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -877,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
   return CharSourceRange::getCharRange(Begin, End);
 }
 
+// Assumes that `Loc` is in an expansion.
+static bool isInExpansionTokenRange(const SourceLocation Loc,
+                                    const SourceManager &SM) {
+  return SM.getSLocEntry(SM.getFileID(Loc))
+      .getExpansion()
+      .isExpansionTokenRange();
+}
+
 CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
                                          const SourceManager &SM,
                                          const LangOptions &LangOpts) {
@@ -896,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
   }
 
   if (Begin.isFileID() && End.isMacroID()) {
-    if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
-                                                          &End)) ||
-        (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
-                                                           &End)))
+    if (Range.isTokenRange()) {
+      if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))
+        return {};
+      // Use the *original* end, not the expanded one in `End`.
+      Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));
+    } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))
       return {};
     Range.setEnd(End);
     return makeRangeFromFileLocs(Range, SM, LangOpts);
@@ -914,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
                                                          &MacroEnd)))) {
     Range.setBegin(MacroBegin);
     Range.setEnd(MacroEnd);
+    // Use the *original* `End`, not the expanded one in `MacroEnd`.
+    if (Range.isTokenRange())
+      Range.setTokenRange(isInExpansionTokenRange(End, SM));
     return makeRangeFromFileLocs(Range, SM, LangOpts);
   }
 

diff  --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 4cdabe042cc83..319c63f6a50ba 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -25,6 +25,7 @@
 #include "clang/Lex/PreprocessorOptions.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <memory>
 #include <vector>
 
 namespace {
@@ -65,7 +66,7 @@ class LexerTest : public ::testing::Test {
 
   std::vector<Token> Lex(StringRef Source) {
     TrivialModuleLoader ModLoader;
-    auto PP = CreatePP(Source, ModLoader);
+    PP = CreatePP(Source, ModLoader);
 
     std::vector<Token> toks;
     while (1) {
@@ -109,6 +110,7 @@ class LexerTest : public ::testing::Test {
   LangOptions LangOpts;
   std::shared_ptr<TargetOptions> TargetOpts;
   IntrusiveRefCntPtr<TargetInfo> Target;
+  std::unique_ptr<Preprocessor> PP;
 };
 
 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
@@ -264,12 +266,14 @@ TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
 
 TEST_F(LexerTest, LexAPI) {
   std::vector<tok::TokenKind> ExpectedTokens;
+  // Line 1 (after the #defines)
   ExpectedTokens.push_back(tok::l_square);
   ExpectedTokens.push_back(tok::identifier);
   ExpectedTokens.push_back(tok::r_square);
   ExpectedTokens.push_back(tok::l_square);
   ExpectedTokens.push_back(tok::identifier);
   ExpectedTokens.push_back(tok::r_square);
+  // Line 2
   ExpectedTokens.push_back(tok::identifier);
   ExpectedTokens.push_back(tok::identifier);
   ExpectedTokens.push_back(tok::identifier);
@@ -357,6 +361,65 @@ TEST_F(LexerTest, LexAPI) {
   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
 }
 
+TEST_F(LexerTest, HandlesSplitTokens) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  // Line 1 (after the #defines)
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::less);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::less);
+  ExpectedTokens.push_back(tok::greatergreater);
+  // Line 2
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::less);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::less);
+  ExpectedTokens.push_back(tok::greatergreater);
+
+  std::vector<Token> toks = CheckLex("#define TY ty\n"
+                                     "#define RANGLE ty<ty<>>\n"
+                                     "TY<ty<>>\n"
+                                     "RANGLE",
+                                     ExpectedTokens);
+
+  SourceLocation outerTyLoc = toks[0].getLocation();
+  SourceLocation innerTyLoc = toks[2].getLocation();
+  SourceLocation gtgtLoc = toks[4].getLocation();
+  // Split the token to simulate the action of the parser and force creation of
+  // an `ExpansionTokenRange`.
+  SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
+
+  // Verify that it only captures the first greater-then and not the second one.
+  CharSourceRange range = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
+      LangOpts);
+  EXPECT_TRUE(range.isCharRange());
+  EXPECT_EQ(range.getAsRange(),
+            SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
+
+  // Verify case where range begins in a macro expansion.
+  range = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
+      LangOpts);
+  EXPECT_TRUE(range.isCharRange());
+  EXPECT_EQ(range.getAsRange(),
+            SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
+                        gtgtLoc.getLocWithOffset(1)));
+
+  SourceLocation macroInnerTyLoc = toks[7].getLocation();
+  SourceLocation macroGtgtLoc = toks[9].getLocation();
+  // Split the token to simulate the action of the parser and force creation of
+  // an `ExpansionTokenRange`.
+  SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
+
+  // Verify that it fails (because it only captures the first greater-then and
+  // not the second one, so it doesn't span the entire macro expansion).
+  range = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
+      SourceMgr, LangOpts);
+  EXPECT_TRUE(range.isInvalid());
+}
+
 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
   std::vector<Token> toks =
       Lex("#define helper1 0\n"


        


More information about the cfe-commits mailing list