[clang] [libTooling] Fix `getFileRangeForEdit` for inner nested template types (PR #87673)

Eric Li via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 5 09:57:22 PDT 2024


https://github.com/tJener updated https://github.com/llvm/llvm-project/pull/87673

>From 697551dd155cd8ada0d93ce8aa2951a9f9f3d4b4 Mon Sep 17 00:00:00 2001
From: Eric Li <li.zhe.hua at gmail.com>
Date: Thu, 4 Apr 2024 14:04:25 -0400
Subject: [PATCH 1/3] [libTooling] Fix `getFileRangeForEdit` for inner nested
 template types

When there is `>>` in source from the right brackets of a nested
template, the end location of the inner template points intto a
scratch space with a single `>` token. This prevents the lexer from
seeing the `>>` token in the original source.

However, this causes the range to appear to be partially in a macro,
and is problematic if we are trying to avoid ranges with any macro
expansions.

This change detects this odd case in token ranges, converting it to
the corresponding character range without the expansion.
---
 clang/lib/Tooling/Transformer/SourceCode.cpp | 61 ++++++++++++++++++--
 clang/unittests/Tooling/SourceCodeTest.cpp   | 29 ++++++++++
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp
index 6aae834b0db563..114f6ba4833cb0 100644
--- a/clang/lib/Tooling/Transformer/SourceCode.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCode.cpp
@@ -101,6 +101,56 @@ static bool spelledInMacroDefinition(SourceLocation Loc,
   return false;
 }
 
+// Returns the expansion loc of `Loc` if `Loc` is the location of the `>` token
+// of an inner nested template, where the inner and outer templates end brackets
+// are spelled as `>>`.
+//
+// Clang handles the `>>` in nested templates by placing the `SourceLocation`
+// of the inner template end bracket in scratch space. This forces it to be a
+// separate token (otherwise it would be lexed as `>>`), but that means it also
+// looks like a macro.
+static std::optional<SourceLocation> getExpansionForNestedTemplateGreater(
+    SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
+  if (Loc.isMacroID()) {
+    auto SpellingLoc = SM.getSpellingLoc(Loc);
+    auto ExpansionLoc = SM.getExpansionLoc(Loc);
+    Token SpellingTok, ExpansionTok;
+    if (Lexer::getRawToken(SpellingLoc, SpellingTok, SM, LangOpts,
+                           /*IgnoreWhiteSpace=*/false)) {
+      return std::nullopt;
+    }
+    if (Lexer::getRawToken(ExpansionLoc, ExpansionTok, SM, LangOpts,
+                           /*IgnoreWhiteSpace=*/false)) {
+      return std::nullopt;
+    }
+    if (SpellingTok.getKind() == tok::greater &&
+        ExpansionTok.getKind() == tok::greatergreater) {
+      return ExpansionLoc;
+    }
+  }
+  return std::nullopt;
+}
+
+// Returns `Range`, but adjusted to smooth over oddities introduced by Clang.
+static CharSourceRange
+cleanRangeForAvoidingMacros(CharSourceRange Range, const SourceManager &SM,
+                            const LangOptions &LangOpts) {
+  if (Range.isTokenRange()) {
+    auto B =
+        getExpansionForNestedTemplateGreater(Range.getBegin(), SM, LangOpts);
+    auto E = getExpansionForNestedTemplateGreater(Range.getEnd(), SM, LangOpts);
+    if (E) {
+      // We can't use the expansion location with a token range, because that
+      // will lex as `>>`. So we instead convert to a char range.
+      return CharSourceRange::getCharRange(B.value_or(Range.getBegin()),
+                                           E->getLocWithOffset(1));
+    } else if (B) {
+      return CharSourceRange::getTokenRange(*B, Range.getEnd());
+    }
+  }
+  return Range;
+}
+
 static CharSourceRange getRange(const CharSourceRange &EditRange,
                                 const SourceManager &SM,
                                 const LangOptions &LangOpts,
@@ -109,13 +159,14 @@ static CharSourceRange getRange(const CharSourceRange &EditRange,
   if (IncludeMacroExpansion) {
     Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
   } else {
-    if (spelledInMacroDefinition(EditRange.getBegin(), SM) ||
-        spelledInMacroDefinition(EditRange.getEnd(), SM))
+    auto AdjustedRange = cleanRangeForAvoidingMacros(EditRange, SM, LangOpts);
+    if (spelledInMacroDefinition(AdjustedRange.getBegin(), SM) ||
+        spelledInMacroDefinition(AdjustedRange.getEnd(), SM))
       return {};
 
-    auto B = SM.getSpellingLoc(EditRange.getBegin());
-    auto E = SM.getSpellingLoc(EditRange.getEnd());
-    if (EditRange.isTokenRange())
+    auto B = SM.getSpellingLoc(AdjustedRange.getBegin());
+    auto E = SM.getSpellingLoc(AdjustedRange.getEnd());
+    if (AdjustedRange.isTokenRange())
       E = Lexer::getLocForEndOfToken(E, 0, SM, LangOpts);
     Range = CharSourceRange::getCharRange(B, E);
   }
diff --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp
index 3641d2ee453f4a..def9701ccdf1e0 100644
--- a/clang/unittests/Tooling/SourceCodeTest.cpp
+++ b/clang/unittests/Tooling/SourceCodeTest.cpp
@@ -50,6 +50,15 @@ struct CallsVisitor : TestVisitor<CallsVisitor> {
   std::function<void(CallExpr *, ASTContext *Context)> OnCall;
 };
 
+struct TypeLocVisitor : TestVisitor<TypeLocVisitor> {
+  bool VisitTypeLoc(TypeLoc TL) {
+    OnCall(TL, Context);
+    return true;
+  }
+
+  std::function<void(TypeLoc, ASTContext *Context)> OnCall;
+};
+
 // Equality matcher for `clang::CharSourceRange`, which lacks `operator==`.
 MATCHER_P(EqualsRange, R, "") {
   return arg.isTokenRange() == R.isTokenRange() &&
@@ -510,6 +519,26 @@ int c = M3(3);
   Visitor.runOver(Code.code());
 }
 
+TEST(SourceCodeTest, InnerNestedTemplate) {
+  llvm::Annotations Code(R"cc(
+  template <typename T>
+  struct S {};
+
+  void f(S<S<S<int>>>);
+)cc");
+
+  TypeLocVisitor Visitor;
+  Visitor.OnCall = [](TypeLoc TL, ASTContext *Context) {
+    if (TL.getSourceRange().isInvalid())
+      return;
+    auto Range = CharSourceRange::getTokenRange(TL.getSourceRange());
+    EXPECT_TRUE(getFileRangeForEdit(Range, *Context,
+                                    /*IncludeMacroExpansion=*/false))
+        << TL.getSourceRange().printToString(Context->getSourceManager());
+  };
+  Visitor.runOver(Code.code(), TypeLocVisitor::Lang_CXX11);
+}
+
 TEST_P(GetFileRangeForEditTest, EditPartialMacroExpansionShouldFail) {
   std::string Code = R"cpp(
 #define BAR 10+

>From b284e974e547816b4196aaa6d52b3291a9ccfc0a Mon Sep 17 00:00:00 2001
From: Eric Li <li.zhe.hua at gmail.com>
Date: Thu, 4 Apr 2024 16:24:06 -0400
Subject: [PATCH 2/3] fixup! [libTooling] Fix `getFileRangeForEdit` for inner
 nested template types

---
 clang/lib/Tooling/Transformer/SourceCode.cpp | 74 ++++++++++----------
 1 file changed, 36 insertions(+), 38 deletions(-)

diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp
index 114f6ba4833cb0..ab7184c2c069e7 100644
--- a/clang/lib/Tooling/Transformer/SourceCode.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCode.cpp
@@ -101,51 +101,49 @@ static bool spelledInMacroDefinition(SourceLocation Loc,
   return false;
 }
 
-// Returns the expansion loc of `Loc` if `Loc` is the location of the `>` token
-// of an inner nested template, where the inner and outer templates end brackets
-// are spelled as `>>`.
-//
-// Clang handles the `>>` in nested templates by placing the `SourceLocation`
-// of the inner template end bracket in scratch space. This forces it to be a
-// separate token (otherwise it would be lexed as `>>`), but that means it also
-// looks like a macro.
-static std::optional<SourceLocation> getExpansionForNestedTemplateGreater(
-    SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
+// Returns the expansion char-range of `Loc` if `Loc` is a split token. For
+// example, `>>` in nested templates needs the first `>` to be split, otherwise
+// the `SourceLocation` of the token would lex as `>>` instead of `>`.
+static std::optional<CharSourceRange>
+getExpansionForSplitToken(SourceLocation Loc, const SourceManager &SM,
+                          const LangOptions &LangOpts) {
   if (Loc.isMacroID()) {
-    auto SpellingLoc = SM.getSpellingLoc(Loc);
-    auto ExpansionLoc = SM.getExpansionLoc(Loc);
-    Token SpellingTok, ExpansionTok;
-    if (Lexer::getRawToken(SpellingLoc, SpellingTok, SM, LangOpts,
-                           /*IgnoreWhiteSpace=*/false)) {
-      return std::nullopt;
-    }
-    if (Lexer::getRawToken(ExpansionLoc, ExpansionTok, SM, LangOpts,
-                           /*IgnoreWhiteSpace=*/false)) {
+    bool Invalid = false;
+    auto &SLoc = SM.getSLocEntry(SM.getFileID(Loc), &Invalid);
+    if (Invalid)
       return std::nullopt;
-    }
-    if (SpellingTok.getKind() == tok::greater &&
-        ExpansionTok.getKind() == tok::greatergreater) {
-      return ExpansionLoc;
+    if (auto &Expansion = SLoc.getExpansion();
+        !Expansion.isExpansionTokenRange()) {
+      // A char-range expansion is only used where a token-range would be
+      // incorrect, and so identifies this as a split token (and importantly,
+      // not as a macro).
+      return Expansion.getExpansionLocRange();
     }
   }
   return std::nullopt;
 }
 
-// Returns `Range`, but adjusted to smooth over oddities introduced by Clang.
-static CharSourceRange
-cleanRangeForAvoidingMacros(CharSourceRange Range, const SourceManager &SM,
-                            const LangOptions &LangOpts) {
+// If `Range` covers a split token, returns the expansion range, otherwise
+// returns `Range`.
+static CharSourceRange getRangeForSplitTokens(CharSourceRange Range,
+                                              const SourceManager &SM,
+                                              const LangOptions &LangOpts) {
   if (Range.isTokenRange()) {
-    auto B =
-        getExpansionForNestedTemplateGreater(Range.getBegin(), SM, LangOpts);
-    auto E = getExpansionForNestedTemplateGreater(Range.getEnd(), SM, LangOpts);
-    if (E) {
-      // We can't use the expansion location with a token range, because that
-      // will lex as `>>`. So we instead convert to a char range.
-      return CharSourceRange::getCharRange(B.value_or(Range.getBegin()),
-                                           E->getLocWithOffset(1));
-    } else if (B) {
-      return CharSourceRange::getTokenRange(*B, Range.getEnd());
+    auto BeginToken = getExpansionForSplitToken(Range.getBegin(), SM, LangOpts);
+    auto EndToken = getExpansionForSplitToken(Range.getEnd(), SM, LangOpts);
+    if (EndToken) {
+      SourceLocation BeginLoc =
+          BeginToken ? BeginToken->getBegin() : Range.getBegin();
+      // We can't use the expansion location with a token-range, because that
+      // will incorrectly lex the end token, so use a char-range that ends at
+      // the split.
+      return CharSourceRange::getCharRange(BeginLoc, EndToken->getEnd());
+    } else if (BeginToken) {
+      // Since the end token is not split, the whole range covers the split, so
+      // the only adjustment we make is to use the expansion location of the
+      // begin token.
+      return CharSourceRange::getTokenRange(BeginToken->getBegin(),
+                                            Range.getEnd());
     }
   }
   return Range;
@@ -159,7 +157,7 @@ static CharSourceRange getRange(const CharSourceRange &EditRange,
   if (IncludeMacroExpansion) {
     Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
   } else {
-    auto AdjustedRange = cleanRangeForAvoidingMacros(EditRange, SM, LangOpts);
+    auto AdjustedRange = getRangeForSplitTokens(EditRange, SM, LangOpts);
     if (spelledInMacroDefinition(AdjustedRange.getBegin(), SM) ||
         spelledInMacroDefinition(AdjustedRange.getEnd(), SM))
       return {};

>From 526d87b762012e4cc22cc635331dea7fd9d99df7 Mon Sep 17 00:00:00 2001
From: Eric Li <li.zhe.hua at gmail.com>
Date: Fri, 5 Apr 2024 12:57:01 -0400
Subject: [PATCH 3/3] fixup! [libTooling] Fix `getFileRangeForEdit` for inner
 nested template types

---
 clang/unittests/Tooling/SourceCodeTest.cpp | 47 ++++++++++++++++++----
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp
index def9701ccdf1e0..3c24b6220a224a 100644
--- a/clang/unittests/Tooling/SourceCodeTest.cpp
+++ b/clang/unittests/Tooling/SourceCodeTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Transformer/SourceCode.h"
 #include "TestVisitor.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
@@ -18,10 +19,12 @@
 #include <gtest/gtest.h>
 
 using namespace clang;
+using namespace clang::ast_matchers;
 
 using llvm::Failed;
 using llvm::Succeeded;
 using llvm::ValueIs;
+using testing::Optional;
 using tooling::getAssociatedRange;
 using tooling::getExtendedRange;
 using tooling::getExtendedText;
@@ -52,11 +55,11 @@ struct CallsVisitor : TestVisitor<CallsVisitor> {
 
 struct TypeLocVisitor : TestVisitor<TypeLocVisitor> {
   bool VisitTypeLoc(TypeLoc TL) {
-    OnCall(TL, Context);
+    OnTypeLoc(TL, Context);
     return true;
   }
 
-  std::function<void(TypeLoc, ASTContext *Context)> OnCall;
+  std::function<void(TypeLoc, ASTContext *Context)> OnTypeLoc;
 };
 
 // Equality matcher for `clang::CharSourceRange`, which lacks `operator==`.
@@ -520,21 +523,49 @@ int c = M3(3);
 }
 
 TEST(SourceCodeTest, InnerNestedTemplate) {
-  llvm::Annotations Code(R"cc(
-  template <typename T>
-  struct S {};
+  llvm::Annotations Code(R"cpp(
+    template <typename T>
+    struct A {};
+    template <typename T>
+    struct B {};
+    template <typename T>
+    struct C {};
 
-  void f(S<S<S<int>>>);
-)cc");
+    void f(A<B<C<int>$r[[>>]]);
+  )cpp");
 
   TypeLocVisitor Visitor;
-  Visitor.OnCall = [](TypeLoc TL, ASTContext *Context) {
+  Visitor.OnTypeLoc = [&](TypeLoc TL, ASTContext *Context) {
     if (TL.getSourceRange().isInvalid())
       return;
+
+    // There are no macros, so every TypeLoc's range should be valid.
     auto Range = CharSourceRange::getTokenRange(TL.getSourceRange());
+    auto LastTokenRange = CharSourceRange::getTokenRange(TL.getEndLoc());
     EXPECT_TRUE(getFileRangeForEdit(Range, *Context,
                                     /*IncludeMacroExpansion=*/false))
         << TL.getSourceRange().printToString(Context->getSourceManager());
+    EXPECT_TRUE(getFileRangeForEdit(LastTokenRange, *Context,
+                                    /*IncludeMacroExpansion=*/false))
+        << TL.getEndLoc().printToString(Context->getSourceManager());
+
+    if (auto matches = match(
+            templateSpecializationTypeLoc(
+                loc(templateSpecializationType(
+                    hasDeclaration(cxxRecordDecl(hasName("A"))))),
+                hasTemplateArgumentLoc(
+                    0, templateArgumentLoc(hasTypeLoc(typeLoc().bind("b"))))),
+            TL, *Context);
+        !matches.empty()) {
+      // A range where the start token is split, but the end token is not.
+      auto OuterTL = TL;
+      auto MiddleTL = *matches[0].getNodeAs<TypeLoc>("b");
+      EXPECT_THAT(
+          getFileRangeForEdit(CharSourceRange::getTokenRange(
+                                  MiddleTL.getEndLoc(), OuterTL.getEndLoc()),
+                              *Context, /*IncludeMacroExpansion=*/false),
+          Optional(EqualsAnnotatedRange(Context, Code.range("r"))));
+    }
   };
   Visitor.runOver(Code.code(), TypeLocVisitor::Lang_CXX11);
 }



More information about the cfe-commits mailing list